From 19cb2349a6e0621f37574d0032c7201e9e530fe6 Mon Sep 17 00:00:00 2001 From: Alberto Faria Date: Mon, 22 Apr 2024 17:31:38 +0100 Subject: [PATCH 1/7] Tolerate images with entrypoint /sbin/init and similar Signed-off-by: Alberto Faria --- src/commands/create/custom_opts.rs | 9 ++++++++- src/commands/create/mod.rs | 7 ++++++- src/commands/exec.rs | 10 +++++++++- src/util.rs | 19 +++++++++++++++++++ 4 files changed, 42 insertions(+), 3 deletions(-) diff --git a/src/commands/create/custom_opts.rs b/src/commands/create/custom_opts.rs index b62b820..d5e9ac8 100644 --- a/src/commands/create/custom_opts.rs +++ b/src/commands/create/custom_opts.rs @@ -87,7 +87,14 @@ impl CustomOptions { .collect(); if let Some(&first_arg) = args.first() { - if first_arg == "no-entrypoint" { + let ignore = [ + "no-entrypoint", + "/sbin/init", + "/usr/sbin/init", + "/usr/local/sbin/init", + ]; + + if ignore.contains(&first_arg.as_str()) { args.remove(0); } } diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index 7abec90..6fd7204 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -23,7 +23,8 @@ use crate::commands::create::first_boot::FirstBootConfig; use crate::commands::create::runtime_env::RuntimeEnv; use crate::util::{ bind_mount_dir_with_different_context, bind_mount_file, create_overlay_vm_image, crun, - find_single_file_in_dirs, is_mountpoint, set_file_context, SpecExt, VmImageInfo, + find_single_file_in_dirs, fix_selinux_label, is_mountpoint, set_file_context, SpecExt, + VmImageInfo, }; pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Result<()> { @@ -174,9 +175,13 @@ fn set_up_container_root( spec.set_process({ let mut process = spec.process().clone().unwrap(); + process.set_cwd(".".into()); process.set_command_line(None); process.set_args(Some(command.into_iter().map(String::from).collect())); + + fix_selinux_label(&mut process); + Some(process) }); diff --git a/src/commands/exec.rs b/src/commands/exec.rs index 6f983a6..c7f035b 100644 --- a/src/commands/exec.rs +++ b/src/commands/exec.rs @@ -8,11 +8,13 @@ use std::io::{BufReader, BufWriter}; use anyhow::{bail, Result}; use clap::Parser; -use crate::util::crun; +use crate::util::{crun, fix_selinux_label}; pub fn exec(args: &liboci_cli::Exec, raw_args: &[impl AsRef]) -> Result<()> { assert!(args.command.is_empty()); + // load exec process config + let process_config_path = args.process.as_ref().expect("process config"); let mut process: oci_spec::runtime::Process = serde_json::from_reader(File::open(process_config_path).map(BufReader::new)?)?; @@ -22,11 +24,17 @@ pub fn exec(args: &liboci_cli::Exec, raw_args: &[impl AsRef]) -> Result<( let new_command = build_command(command)?; process.set_args(Some(new_command)); + fix_selinux_label(&mut process); + + // store modified exec process config + serde_json::to_writer( File::create(process_config_path).map(BufWriter::new)?, &process, )?; + // actually exec + crun(raw_args)?; Ok(()) diff --git a/src/util.rs b/src/util.rs index 107e9d6..fc0cde4 100644 --- a/src/util.rs +++ b/src/util.rs @@ -6,12 +6,31 @@ use std::io::{self, ErrorKind}; use std::os::unix::ffi::OsStrExt; use std::os::unix::fs::{MetadataExt, PermissionsExt}; use std::process::{Command, Stdio}; +use std::str; use anyhow::{anyhow, bail, ensure, Result}; use camino::{Utf8Path, Utf8PathBuf}; use nix::mount::{MntFlags, MsFlags}; use serde::Deserialize; +// When the container image's entrypoint is /sbin/init or similar, Podman gives the entrypoint (and +// exec entrypoint) process an SELinux label of, for instance: +// +// system_u:system_r:container_init_t:s0:c276,c638 +// +// However, we are going to change our entrypoint to something else, so we need to use the +// "standard" label that Podman otherwise gives, which in this case would be: +// +// system_u:system_r:container_t:s0:c276,c638 +// +// This function performs that mapping. +pub fn fix_selinux_label(process: &mut oci_spec::runtime::Process) { + if let Some(label) = process.selinux_label() { + let new_label = label.replace("container_init_t", "container_t"); + process.set_selinux_label(Some(new_label)); + } +} + pub fn set_file_context(path: impl AsRef, context: &str) -> Result<()> { extern "C" { fn setfilecon(path: *const c_char, con: *const c_char) -> i32; From a9118ee63c76bf3b0f674eb164b35090f383f862 Mon Sep 17 00:00:00 2001 From: Alberto Faria Date: Mon, 4 Mar 2024 18:53:16 +0000 Subject: [PATCH 2/7] Add support for running bootc bootable containers We attempt to detect if a container image is bootable. We can't easily retrieve the image's labels, so we check if /usr/lib/bootc/install exists and is a directory. If so, it is a bootable container. If it is a bootable container but we're not running under Podman, we fail with an error. Once our container's entrypoint starts running, a background process on the host (outside the container) queries Podman for the image's name and ID, which the OCI runtime does not get but bootc-install needs. It then saves the container image as an OCI archive. It then runs the original container to generate the VM image. We do this using krun [1] so that elevated privileges aren't necessary. Our entrypoint blocks until this is done, and all subsequent logic remains the same. We could potentially avoid the OCI archive creation step by mounting the host's container storage into the container running under krun. This isn't trivial to achieve due to SELinux label and context mismatches between the host and the krun environment, so we leave this optimization for a future date. Closes #26. [1] https://github.com/containers/crun/blob/main/krun.1.md Signed-off-by: Alberto Faria --- docs/1-installing.md | 2 +- docs/2-podman-docker.md | 19 ++++ embed/bootc/config.json | 88 +++++++++++++++++ embed/bootc/entrypoint.sh | 51 ++++++++++ embed/bootc/prepare.sh | 64 ++++++++++++ {scripts => embed}/entrypoint.sh | 18 ++++ {scripts => embed}/exec.sh | 1 - {scripts => embed}/virtiofsd.sh | 0 plans/tests.fmf | 1 + src/commands/create/mod.rs | 161 ++++++++++++++++++++++++------- src/util.rs | 13 ++- tests/env.sh | 21 ++-- tests/t/cloud-init.sh | 50 +++++----- tests/t/hostname.sh | 2 +- tests/t/mount.sh | 2 +- tests/t/publish.sh | 41 ++++---- 16 files changed, 440 insertions(+), 94 deletions(-) create mode 100644 embed/bootc/config.json create mode 100644 embed/bootc/entrypoint.sh create mode 100644 embed/bootc/prepare.sh rename {scripts => embed}/entrypoint.sh (87%) rename {scripts => embed}/exec.sh (94%) rename {scripts => embed}/virtiofsd.sh (100%) diff --git a/docs/1-installing.md b/docs/1-installing.md index b1c2822..44cb689 100644 --- a/docs/1-installing.md +++ b/docs/1-installing.md @@ -35,7 +35,7 @@ To also set up crun-vm for use with Docker: 1. Install crun-vm's runtime dependencies: ```console - $ dnf install bash coreutils crun genisoimage grep libselinux-devel libvirt-client libvirt-daemon-driver-qemu libvirt-daemon-log openssh-clients qemu-img qemu-system-x86-core shadow-utils util-linux virtiofsd + $ dnf install bash coreutils crun crun-krun genisoimage grep libselinux-devel libvirt-client libvirt-daemon-driver-qemu libvirt-daemon-log openssh-clients qemu-img qemu-system-x86-core sed shadow-utils util-linux virtiofsd ``` 2. Install Rust and Cargo if you do not already have Rust tooling available: diff --git a/docs/2-podman-docker.md b/docs/2-podman-docker.md index 47be99c..4608bfe 100644 --- a/docs/2-podman-docker.md +++ b/docs/2-podman-docker.md @@ -96,6 +96,21 @@ in a container image. Note that flag `--persistent` has no effect when running VMs from container images. +### From bootable container images + +crun-vm can also work with [bootable container images], which are containers +that package a full operating system: + +```console +$ podman run \ + --runtime crun-vm \ + -it --rm \ + quay.io/crun-vm/example-fedora-bootc:40 +``` + +Internally, crun-vm generates a VM image from the bootable container and then +boots it. + ## First-boot customization ### cloud-init @@ -320,6 +335,9 @@ To use system emulation instead of hardware-assisted virtualization, specify the `--emulated` flag. Without this flag, attempting to create a VM on a host tbat doesn't support KVM will fail. +It's not currently possible to use this flag when the container image is a bootc +bootable container. + ### Inspecting and customizing the libvirt domain XML crun-vm internally uses [libvirt] to launch a VM, generating a [domain XML @@ -340,6 +358,7 @@ be merged with it using the non-standard option `--merge-libvirt-xml `. > Before using this flag, consider if you would be better served using libvirt > directly to manage your VM. +[bootable container images]: https://containers.github.io/bootable/ [cloud-init]: https://cloud-init.io/ [domain XML definition]: https://libvirt.org/formatdomain.html [Ignition]: https://coreos.github.io/ignition/ diff --git a/embed/bootc/config.json b/embed/bootc/config.json new file mode 100644 index 0000000..a40fc6c --- /dev/null +++ b/embed/bootc/config.json @@ -0,0 +1,88 @@ +{ + "ociVersion": "1.0.0", + "process": { + "terminal": true, + "user": { "uid": 0, "gid": 0 }, + "args": ["/output/entrypoint.sh", ""], + "env": [ + "PATH=/usr/local/sbin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin", + "TERM=xterm" + ], + "cwd": "/", + "capabilities": { + "bounding": [], + "effective": [], + "inheritable": [], + "permitted": [], + "ambient": [] + }, + "rlimits": [ + { + "type": "RLIMIT_NOFILE", + "hard": 262144, + "soft": 262144 + } + ], + "noNewPrivileges": true + }, + "root": { + "path": "", + "readonly": false + }, + "hostname": "bootc-install", + "mounts": [ + { + "type": "bind", + "source": "/root/crun-vm/bootc", + "destination": "/output", + "options": ["bind", "rprivate", "rw"] + }, + { + "destination": "/proc", + "type": "proc", + "source": "proc" + }, + { + "destination": "/dev/pts", + "type": "devpts", + "source": "devpts", + "options": [ + "nosuid", + "noexec", + "newinstance", + "ptmxmode=0666", + "mode=0620", + "gid=5" + ] + } + ], + "linux": { + "namespaces": [ + { "type": "pid" }, + { "type": "network" }, + { "type": "ipc" }, + { "type": "uts" }, + { "type": "cgroup" }, + { "type": "mount" } + ], + "maskedPaths": [ + "/proc/acpi", + "/proc/asound", + "/proc/kcore", + "/proc/keys", + "/proc/latency_stats", + "/proc/timer_list", + "/proc/timer_stats", + "/proc/sched_debug", + "/sys/firmware", + "/proc/scsi" + ], + "readonlyPaths": [ + "/proc/bus", + "/proc/fs", + "/proc/irq", + "/proc/sys", + "/proc/sysrq-trigger" + ] + } +} diff --git a/embed/bootc/entrypoint.sh b/embed/bootc/entrypoint.sh new file mode 100644 index 0000000..f5cbdd5 --- /dev/null +++ b/embed/bootc/entrypoint.sh @@ -0,0 +1,51 @@ +#!/bin/sh +# SPDX-License-Identifier: GPL-2.0-or-later + +set -e + +image_name=$1 + +# monkey-patch loopdev partition detection, given we're not running systemd +# (bootc runs `udevadm settle` as a way to wait until loopdev partitions are +# detected; we hijack that call and use partx to set up the partition devices) + +original_udevadm=$( which udevadm ) + +mkdir -p /output/bin + +cat >/output/bin/udevadm </usr/lib/bootc/install/00-crun-vm.toml <<-EOF + [install.filesystem.root] + type = "xfs" +EOF + +fi + +# build disk image using bootc-install + +PATH=/output/bin:$PATH bootc install to-disk \ + --source-imgref oci-archive:/output/image.oci-archive \ + --target-imgref "$image_name" \ + --skip-fetch-check \ + --generic-image \ + --via-loopback \ + --karg console=tty0 \ + --karg console=ttyS0 \ + --karg selinux=0 \ + /output/image.raw + +# communicate success by creating a file, since krun always exits successfully + +touch /output/success diff --git a/embed/bootc/prepare.sh b/embed/bootc/prepare.sh new file mode 100644 index 0000000..e1124d9 --- /dev/null +++ b/embed/bootc/prepare.sh @@ -0,0 +1,64 @@ +#!/bin/bash +# SPDX-License-Identifier: GPL-2.0-or-later + +set -o errexit -o pipefail -o nounset + +original_root=$1 +priv_dir=$2 +container_id=$3 + +__step() { + printf "\033[36m%s\033[0m\n" "$*" +} + +bootc_dir=$priv_dir/root/crun-vm/bootc + +mkfifo "$bootc_dir/progress" +exec > "$bootc_dir/progress" 2>&1 + +# this blocks here until the named pipe above is opened by entrypoint.sh + +# get info about the container *image* + +__step 'Storing the container image as an OCI archive...' + +image_info=$( + podman container inspect \ + --format '{{.ImageName}}\t{{.Image}}' \ + "$container_id" + ) + +image_name=$( cut -f1 <<< "$image_info" ) +image_id=$( cut -f2 <<< "$image_info" ) + +oci_archive=$bootc_dir/image.oci-archive + +# save container *image* as an OCI archive + +podman save --format oci-archive --output "$oci_archive.tmp" "$image_id" " "$image_name" +__sed "" "$original_root" +__sed "" "$priv_dir" + +# run bootc-install under krun + +truncate --size 10G "$bootc_dir/image.raw" # TODO: allow adjusting disk size + +krun run \ + --config "$bootc_dir/config.json" \ + "crun-vm-$container_id" \ + ]) -> Resu let config_path = bundle_path.join("config.json"); let mut spec = oci_spec::runtime::Spec::load(&config_path)?; - let original_root_path: Utf8PathBuf = spec.root_path()?.canonicalize()?.try_into()?; // ensure absolute - - if let Some(process) = spec.process().as_ref() { - if let Some(capabilities) = process.capabilities().as_ref() { - fn any_is_cap_sys_admin(caps: &Option) -> bool { - caps.as_ref() - .is_some_and(|set| set.contains(&oci_spec::runtime::Capability::SysAdmin)) - } + ensure_unprivileged(&spec)?; - ensure!( - !any_is_cap_sys_admin(capabilities.bounding()) - && !any_is_cap_sys_admin(capabilities.effective()) - && !any_is_cap_sys_admin(capabilities.inheritable()) - && !any_is_cap_sys_admin(capabilities.permitted()) - && !any_is_cap_sys_admin(capabilities.ambient()), - "crun-vm is incompatible with privileged containers" - ); - } - } + let original_root_path: Utf8PathBuf = spec.root_path()?.canonicalize()?.try_into()?; // ensure absolute let runtime_env = RuntimeEnv::current(&spec, &original_root_path)?; let custom_options = CustomOptions::from_spec(&spec, runtime_env)?; + let is_bootc_container = is_bootc_container( + &args.container_id, + bundle_path, + &original_root_path, + runtime_env, + )?; + + ensure!( + !is_bootc_container || !custom_options.emulated, + "--emulated is incompatible with bootable containers" + ); + // We include container_id in our paths to ensure no overlap with the user container's contents. let priv_dir_path = original_root_path.join(format!("crun-vm-{}", args.container_id)); fs::create_dir_all(&priv_dir_path)?; @@ -66,7 +64,13 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu set_file_context(&priv_dir_path, context)?; } - set_up_container_root(&mut spec, &priv_dir_path, &custom_options)?; + set_up_container_root( + &mut spec, + &priv_dir_path, + &custom_options, + is_bootc_container, + )?; + let is_first_create = is_first_create(&spec)?; let base_vm_image_info = set_up_vm_image( @@ -75,6 +79,7 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu &priv_dir_path, &custom_options, is_first_create, + is_bootc_container, )?; let mut mounts = Mounts::default(); @@ -105,9 +110,84 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu crun(raw_args)?; // actually create container + if is_first_create && is_bootc_container { + // We want to ask podman what our image name is, so we can give it to bootc-install, but we + // can't wait synchronously for a response since podman hangs until this create command + // completes. We then want to run bootc-install under krun, which already isolates the + // workload and so can be run outside of our container. We thus launch a process that + // asynchronously performs these steps, and share its progress and output with out + // container's entrypoint through a named pipe. + // + // Note that this process blocks until our container's entrypoint actually starts running, + // thus after the "start" OCI runtime command is called. + + let bootc_dir = priv_dir_path.join("root/crun-vm/bootc"); + fs::create_dir_all(&bootc_dir)?; + + std::process::Command::new(bootc_dir.join("prepare.sh")) + .arg(&original_root_path) + .arg(&priv_dir_path) + .arg(&args.container_id) + .stdin(Stdio::null()) + .stdout(Stdio::null()) + .stderr(Stdio::null()) + .spawn()?; + } + Ok(()) } +fn ensure_unprivileged(spec: &oci_spec::runtime::Spec) -> Result<()> { + if let Some(process) = spec.process().as_ref() { + if let Some(capabilities) = process.capabilities().as_ref() { + fn any_is_cap_sys_admin(caps: &Option) -> bool { + caps.as_ref() + .is_some_and(|set| set.contains(&oci_spec::runtime::Capability::SysAdmin)) + } + + ensure!( + !any_is_cap_sys_admin(capabilities.bounding()) + && !any_is_cap_sys_admin(capabilities.effective()) + && !any_is_cap_sys_admin(capabilities.inheritable()) + && !any_is_cap_sys_admin(capabilities.permitted()) + && !any_is_cap_sys_admin(capabilities.ambient()), + "crun-vm is incompatible with privileged containers" + ); + } + } + + Ok(()) +} + +fn is_bootc_container( + container_id: &str, + bundle_path: &Utf8Path, + original_root_path: &Utf8Path, + env: RuntimeEnv, +) -> Result { + lazy_static! { + static ref PATTERN: Regex = Regex::new(r"/overlay-containers/([^/]+)/userdata$").unwrap(); + } + + let is_bootc_container = original_root_path.join("usr/lib/bootc/install").is_dir(); + + if is_bootc_container { + // check as much as we can that we're running under podman + + let is_podman_bundle_path = match PATTERN.captures(bundle_path.as_str()) { + Some(captures) => &captures[1] == container_id, + None => false, + }; + + ensure!( + env == RuntimeEnv::Other && is_podman_bundle_path, + "bootc containers are only supported with Podman" + ); + } + + Ok(is_bootc_container) +} + fn is_first_create(spec: &oci_spec::runtime::Spec) -> Result { let path = spec.root_path()?.join("crun-vm/create-ran"); @@ -129,6 +209,7 @@ fn set_up_container_root( spec: &mut oci_spec::runtime::Spec, priv_dir_path: &Utf8Path, custom_options: &CustomOptions, + is_bootc_container: bool, ) -> Result<()> { let new_root_path = priv_dir_path.join("root"); fs::create_dir_all(&new_root_path)?; @@ -148,19 +229,22 @@ fn set_up_container_root( .unwrap(), )); - // set up container scripts + // set up container files #[derive(RustEmbed)] - #[folder = "scripts/"] - struct Scripts; + #[folder = "embed/"] + struct Embed; - for path in Scripts::iter() { + for path in Embed::iter() { let path_in_host = new_root_path.join("crun-vm").join(path.as_ref()); fs::create_dir_all(path_in_host.parent().unwrap())?; - let file = Scripts::get(&path).unwrap(); + let file = Embed::get(&path).unwrap(); fs::write(&path_in_host, file.data)?; - fs::set_permissions(&path_in_host, Permissions::from_mode(0o755))?; + + let is_script = path.as_ref().ends_with(".sh"); + let mode = if is_script { 0o755 } else { 0o644 }; + fs::set_permissions(&path_in_host, Permissions::from_mode(mode))?; } // configure container entrypoint @@ -170,7 +254,8 @@ fn set_up_container_root( } else if custom_options.print_config_json { vec!["cat", "/crun-vm/config.json"] } else { - vec!["/crun-vm/entrypoint.sh"] + let arg = if is_bootc_container { "1" } else { "0" }; + vec!["/crun-vm/entrypoint.sh", arg] }; spec.set_process({ @@ -194,7 +279,20 @@ fn set_up_vm_image( priv_dir_path: &Utf8Path, custom_options: &CustomOptions, is_first_create: bool, + is_bootc_container: bool, ) -> Result { + let mirror_vm_image_path_in_container = Utf8PathBuf::from("/crun-vm/image/image"); + let mirror_vm_image_path_in_host = spec.root_path()?.join("crun-vm/image/image"); + + if is_bootc_container { + // the image will be generated later + return Ok(VmImageInfo { + path: mirror_vm_image_path_in_container, + size: 0, + format: "raw".to_string(), + }); + } + // where inside the container to look for the VM image const VM_IMAGE_SEARCH_PATHS: [&str; 2] = ["./", "disk/"]; @@ -218,9 +316,6 @@ fn set_up_vm_image( fs::hard_link(vm_image_path_in_host, image_dir_path.join("image"))?; } - let mirror_vm_image_path_in_container = Utf8PathBuf::from("/crun-vm/image/image"); - let mirror_vm_image_path_in_host = spec.root_path()?.join("crun-vm/image/image"); - if custom_options.persistent { // Mount overlayfs to expose the user's VM image file with a different SELinux context so we // can always access it, using the file's parent as the upperdir so that writes still @@ -230,7 +325,7 @@ fn set_up_vm_image( bind_mount_dir_with_different_context( image_dir_path, mirror_vm_image_path_in_host.parent().unwrap(), - priv_dir_path.join("scratch"), + priv_dir_path.join("scratch-image"), spec.mount_label(), false, )?; @@ -253,7 +348,7 @@ fn set_up_vm_image( bind_mount_dir_with_different_context( image_dir_path, mirror_vm_image_path_in_host.parent().unwrap(), - priv_dir_path.join("scratch"), + priv_dir_path.join("scratch-image"), spec.mount_label(), true, )?; @@ -580,7 +675,7 @@ fn set_up_security(spec: &mut oci_spec::runtime::Spec) { // TODO: This doesn't seem reasonable at all. Should we just force users to use a different // seccomp profile? Should passt provide the option to bypass a lot of the isolation that it // does, given we're already in a container *and* under a seccomp profile? - spec.linux_seccomp_syscalls_push( + spec.linux_seccomp_syscalls_push_front( oci_spec::runtime::LinuxSyscallBuilder::default() .names(["mount", "pivot_root", "umount2", "unshare"].map(String::from)) .action(oci_spec::runtime::LinuxSeccompAction::ScmpActAllow) diff --git a/src/util.rs b/src/util.rs index fc0cde4..84ce83f 100644 --- a/src/util.rs +++ b/src/util.rs @@ -33,13 +33,13 @@ pub fn fix_selinux_label(process: &mut oci_spec::runtime::Process) { pub fn set_file_context(path: impl AsRef, context: &str) -> Result<()> { extern "C" { - fn setfilecon(path: *const c_char, con: *const c_char) -> i32; + fn lsetfilecon(path: *const c_char, con: *const c_char) -> i32; } let path = CString::new(path.as_ref().as_os_str().as_bytes())?; let context = CString::new(context.as_bytes())?; - if unsafe { setfilecon(path.as_ptr(), context.as_ptr()) } != 0 { + if unsafe { lsetfilecon(path.as_ptr(), context.as_ptr()) } != 0 { return Err(io::Error::last_os_error().into()); } @@ -198,7 +198,7 @@ pub trait SpecExt { linux_device_cgroup: oci_spec::runtime::LinuxDeviceCgroup, ); fn process_capabilities_insert_beip(&mut self, capability: oci_spec::runtime::Capability); - fn linux_seccomp_syscalls_push(&mut self, linux_syscall: oci_spec::runtime::LinuxSyscall); + fn linux_seccomp_syscalls_push_front(&mut self, linux_syscall: oci_spec::runtime::LinuxSyscall); } impl SpecExt for oci_spec::runtime::Spec { @@ -276,7 +276,10 @@ impl SpecExt for oci_spec::runtime::Spec { }); } - fn linux_seccomp_syscalls_push(&mut self, linux_syscall: oci_spec::runtime::LinuxSyscall) { + fn linux_seccomp_syscalls_push_front( + &mut self, + linux_syscall: oci_spec::runtime::LinuxSyscall, + ) { self.set_linux({ let mut linux = self.linux().clone().expect("linux config"); linux.set_seccomp({ @@ -284,7 +287,7 @@ impl SpecExt for oci_spec::runtime::Spec { if let Some(seccomp) = &mut seccomp { seccomp.set_syscalls({ let mut syscalls = seccomp.syscalls().clone().unwrap_or_default(); - syscalls.push(linux_syscall); + syscalls.insert(0, linux_syscall); Some(syscalls) }); } diff --git a/tests/env.sh b/tests/env.sh index f6ed1d2..7400856 100755 --- a/tests/env.sh +++ b/tests/env.sh @@ -11,20 +11,23 @@ container_name=crun-vm-test-env declare -A TEST_IMAGES TEST_IMAGES=( - [fedora]=quay.io/containerdisks/fedora:40 # uses cloud-init - [coreos]=quay.io/crun-vm/example-fedora-coreos:40 # uses Ignition + [fedora]=quay.io/containerdisks/fedora:40 # uses cloud-init + [coreos]=quay.io/crun-vm/example-fedora-coreos:40 # uses Ignition + [fedora-bootc]=quay.io/crun-vm/example-fedora-bootc:40 # bootable container ) declare -A TEST_IMAGES_DEFAULT_USER TEST_IMAGES_DEFAULT_USER=( [fedora]=fedora [coreos]=core + [fedora-bootc]=fedora ) declare -A TEST_IMAGES_DEFAULT_USER_HOME TEST_IMAGES_DEFAULT_USER_HOME=( [fedora]=/home/fedora [coreos]=/var/home/core + [fedora-bootc]=/var/home/cloud-user ) __bad_usage() { @@ -140,12 +143,12 @@ build) # expand base image - __log_and_run qemu-img create -f qcow2 "$temp_dir/resized-image.qcow2" 20G + __log_and_run qemu-img create -f qcow2 "$temp_dir/image.qcow2" 50G __log_and_run virt-resize \ --quiet \ --expand /dev/sda4 \ "$temp_dir/image" \ - "$temp_dir/resized-image.qcow2" + "$temp_dir/image.qcow2" rm "$temp_dir/image" @@ -179,6 +182,7 @@ build) bash \ coreutils \ crun \ + crun-krun \ docker \ genisoimage \ grep \ @@ -210,17 +214,12 @@ build) __log_and_run podman wait --ignore "$container_name-build" __extra_cleanup() { :; } - __log_and_run virt-sparsify \ - --quiet \ - "$temp_dir/resized-image.qcow2" \ - "$temp_dir/final-image.qcow2" - - rm "$temp_dir/resized-image.qcow2" + __log_and_run virt-sparsify --quiet --in-place "$temp_dir/image.qcow2" # package new image file __log_and_run "$( __rel "$repo_root/util/package-vm-image.sh" )" \ - "$temp_dir/final-image.qcow2" \ + "$temp_dir/image.qcow2" \ "$env_image" __big_log 33 'Done.' diff --git a/tests/t/cloud-init.sh b/tests/t/cloud-init.sh index 6ea51dd..5ac77e1 100644 --- a/tests/t/cloud-init.sh +++ b/tests/t/cloud-init.sh @@ -1,30 +1,36 @@ # SPDX-License-Identifier: GPL-2.0-or-later -image="${TEST_IMAGES[fedora]}" -user="${TEST_IMAGES_DEFAULT_USER[fedora]}" -home="${TEST_IMAGES_DEFAULT_USER_HOME[fedora]}" - -cat >"$TEMP_DIR/user-data" <"$TEMP_DIR/user-data" <<-EOF + #cloud-config + write_files: + - path: $home/file + content: | + hello EOF -cat >"$TEMP_DIR/meta-data" <"$TEMP_DIR/meta-data" <<-EOF EOF -__engine run \ - --rm --detach \ - --name cloud-init \ - "$image" \ - --cloud-init "$TEMP_DIR" + __engine run \ + --rm --detach \ + --name cloud-init \ + "$image" \ + --cloud-init "$TEMP_DIR" + + __test() { + __engine exec cloud-init --as "$user" "cmp $home/file <<< hello" + } + + __test + __engine restart cloud-init + __test -__test() { - __engine exec cloud-init --as "$user" "cmp $home/file <<< hello" -} + __engine stop cloud-init -__test -__engine restart cloud-init -__test +done diff --git a/tests/t/hostname.sh b/tests/t/hostname.sh index 3b1039c..c185f76 100644 --- a/tests/t/hostname.sh +++ b/tests/t/hostname.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -for os in fedora coreos; do +for os in "${!TEST_IMAGES[@]}"; do image="${TEST_IMAGES[$os]}" user="${TEST_IMAGES_DEFAULT_USER[$os]}" diff --git a/tests/t/mount.sh b/tests/t/mount.sh index cebb2e5..4be1085 100644 --- a/tests/t/mount.sh +++ b/tests/t/mount.sh @@ -1,6 +1,6 @@ # SPDX-License-Identifier: GPL-2.0-or-later -for os in fedora coreos; do +for os in "${!TEST_IMAGES[@]}"; do image="${TEST_IMAGES[$os]}" user="${TEST_IMAGES_DEFAULT_USER[$os]}" diff --git a/tests/t/publish.sh b/tests/t/publish.sh index 4c9b98a..45ef70a 100644 --- a/tests/t/publish.sh +++ b/tests/t/publish.sh @@ -1,30 +1,33 @@ # SPDX-License-Identifier: GPL-2.0-or-later -image="${TEST_IMAGES[fedora]}" -user="${TEST_IMAGES_DEFAULT_USER[fedora]}" +trap '__engine stop publish' EXIT -__engine run \ - --rm --detach \ - --name publish \ - --publish 127.0.0.1::8000 \ - "$image" +for os in fedora fedora-bootc; do -endpoint=$( __engine port publish | tee /dev/stderr | cut -d' ' -f3 ) + image="${TEST_IMAGES[$os]}" + user="${TEST_IMAGES_DEFAULT_USER[$os]}" -__engine exec publish --as "$user" + __engine run --rm --detach --name publish --publish 127.0.0.1::8000 "$image" -__log 'Ensuring curl fails...' -! curl "$endpoint" 2>/dev/null + endpoint=$( __engine port publish | tee /dev/stderr | cut -d' ' -f3 ) -__engine exec publish --as "$user" python -m http.server & -trap '__engine stop publish' EXIT + __engine exec publish --as "$user" + + __log 'Ensuring curl fails...' + ! curl "$endpoint" 2>/dev/null + + __engine exec publish --as "$user" python -m http.server & + + __log 'Ensuring curl succeeds...' + + i=0 + max_tries=30 -__log 'Ensuring curl succeeds...' + until [[ "$( curl "$endpoint" 2>/dev/null )" == ''* ]]; do + (( ++i < max_tries )) + sleep 1 + done -i=0 -max_tries=30 + __engine stop publish -until [[ "$( curl "$endpoint" 2>/dev/null )" == ''* ]]; do - (( ++i < max_tries )) - sleep 1 done From ad26d7fe955efbc72f30551bd5756ab8fc470286 Mon Sep 17 00:00:00 2001 From: Alberto Faria Date: Fri, 26 Apr 2024 10:51:29 +0100 Subject: [PATCH 3/7] Cache VM images generated from bootc container images Store them as untagged containerdisks in the user's container storage. They appear in the output of `podman images`, making users aware of their existence and size on disk, and cleanup commands like `podman image prune` remove them. Signed-off-by: Alberto Faria --- embed/bootc/entrypoint.sh | 2 +- embed/bootc/prepare.sh | 94 +++++++++++++++++++++++++++++--------- embed/entrypoint.sh | 2 +- src/commands/create/mod.rs | 2 +- 4 files changed, 75 insertions(+), 25 deletions(-) diff --git a/embed/bootc/entrypoint.sh b/embed/bootc/entrypoint.sh index f5cbdd5..77983ba 100644 --- a/embed/bootc/entrypoint.sh +++ b/embed/bootc/entrypoint.sh @@ -48,4 +48,4 @@ PATH=/output/bin:$PATH bootc install to-disk \ # communicate success by creating a file, since krun always exits successfully -touch /output/success +touch /output/bootc-install-success diff --git a/embed/bootc/prepare.sh b/embed/bootc/prepare.sh index e1124d9..c2a4df2 100644 --- a/embed/bootc/prepare.sh +++ b/embed/bootc/prepare.sh @@ -20,8 +20,6 @@ exec > "$bootc_dir/progress" 2>&1 # get info about the container *image* -__step 'Storing the container image as an OCI archive...' - image_info=$( podman container inspect \ --format '{{.ImageName}}\t{{.Image}}' \ @@ -31,34 +29,86 @@ image_info=$( image_name=$( cut -f1 <<< "$image_info" ) image_id=$( cut -f2 <<< "$image_info" ) -oci_archive=$bootc_dir/image.oci-archive +# check if VM image is cached -# save container *image* as an OCI archive +container_name=crun-vm-$container_id -podman save --format oci-archive --output "$oci_archive.tmp" "$image_id" /dev/null 2>&1 || true' EXIT + + podman create --quiet --name "$container_name" "$cache_image_id" /dev/null + podman export "$container_name" | tar -C "$bootc_dir" -x image.qcow2 + podman rm "$container_name" >/dev/null 2>&1 + + trap '' EXIT + +else + + __step "Converting $image_name into a VM image..." + + # save container *image* as an OCI archive + + echo -n 'Preparing container image...' + + podman save \ + --format oci-archive \ + --output "$bootc_dir/image.oci-archive" \ + "$image_id" \ + &1 \ + | sed -u 's/.*/./' \ + | stdbuf -o0 tr -d '\n' + + echo -__sed "" "$image_name" -__sed "" "$original_root" -__sed "" "$priv_dir" + # adjust krun config -# run bootc-install under krun + __sed() { + sed -i "s|$1|$2|" "$bootc_dir/config.json" + } + + __sed "" "$image_name" + __sed "" "$original_root" + __sed "" "$priv_dir" + + # run bootc-install under krun + + truncate --size 10G "$bootc_dir/image.raw" # TODO: allow adjusting disk size + + trap 'krun delete --force "$container_name" >/dev/null 2>&1 || true' EXIT + krun run --config "$bootc_dir/config.json" "$container_name" Date: Fri, 26 Apr 2024 16:20:00 +0100 Subject: [PATCH 4/7] Improve engine detection logic Signed-off-by: Alberto Faria --- src/commands/create/custom_opts.rs | 6 +- src/commands/create/engine.rs | 99 ++++++++++++++++++++++++++++++ src/commands/create/mod.rs | 52 ++++------------ src/commands/create/runtime_env.rs | 51 --------------- 4 files changed, 115 insertions(+), 93 deletions(-) create mode 100644 src/commands/create/engine.rs delete mode 100644 src/commands/create/runtime_env.rs diff --git a/src/commands/create/custom_opts.rs b/src/commands/create/custom_opts.rs index d5e9ac8..6877c2d 100644 --- a/src/commands/create/custom_opts.rs +++ b/src/commands/create/custom_opts.rs @@ -9,7 +9,7 @@ use clap::Parser; use lazy_static::lazy_static; use regex::Regex; -use crate::commands::create::runtime_env::RuntimeEnv; +use crate::commands::create::engine::Engine; #[derive(Clone, Debug)] pub struct Blockdev { @@ -75,7 +75,7 @@ pub struct CustomOptions { } impl CustomOptions { - pub fn from_spec(spec: &oci_spec::runtime::Spec, env: RuntimeEnv) -> Result { + pub fn from_spec(spec: &oci_spec::runtime::Spec, engine: Engine) -> Result { let mut args: Vec<&String> = spec .process() .as_ref() @@ -158,7 +158,7 @@ impl CustomOptions { ), ); - if env == RuntimeEnv::Kubernetes { + if engine == Engine::Kubernetes { for blockdev in &mut options.blockdev { blockdev.source = path_in_container_into_path_in_host(spec, &blockdev.source)?; blockdev.target = path_in_container_into_path_in_host(spec, &blockdev.target)?; diff --git a/src/commands/create/engine.rs b/src/commands/create/engine.rs new file mode 100644 index 0000000..a004a3b --- /dev/null +++ b/src/commands/create/engine.rs @@ -0,0 +1,99 @@ +// SPDX-License-Identifier: GPL-2.0-or-later + +use std::fs; +use std::path::Path; + +use anyhow::{bail, Result}; +use camino::Utf8Path; +use lazy_static::lazy_static; +use regex::Regex; + +#[derive(Clone, Copy, Debug, Eq, PartialEq)] +pub enum Engine { + Podman, + Docker, + Kubernetes, +} + +impl Engine { + pub fn detect( + container_id: &str, + bundle_path: &Utf8Path, + spec: &oci_spec::runtime::Spec, + original_root_path: impl AsRef, + ) -> Result { + // TODO: Make this absolutely robust and secure. Probably require engine config to pass us + // an option specifying what engine is running crun-vm. + + // check if we're under CRI-O under Kubernetes + + { + let has_kubernetes_secrets_dir = spec.mounts().iter().flatten().any(|m| { + m.destination() + .starts_with("/var/run/secrets/kubernetes.io") + }); + + let has_kubernetes_managed_etc_hosts = spec + .mounts() + .iter() + .flatten() + .filter(|m| m.destination() == Utf8Path::new("/etc/hosts")) + .flat_map(|m| m.source()) + .next() + .map(fs::read_to_string) + .transpose()? + .and_then(|hosts| hosts.lines().next().map(|line| line.to_string())) + .map(|line| line.contains("Kubernetes-managed hosts file")) + .unwrap_or(false); + + if has_kubernetes_secrets_dir || has_kubernetes_managed_etc_hosts { + return Ok(Engine::Kubernetes); + } + } + + // check if we're under Docker + + { + let has_dot_dockerenv_file = original_root_path + .as_ref() + .join(".dockerenv") + .try_exists()?; + + if has_dot_dockerenv_file { + return Ok(Engine::Docker); + } + } + + // check if we're under Podman + + { + let has_mount_on = |p| { + spec.mounts() + .iter() + .flatten() + .any(|m| m.destination() == Path::new(p)) + }; + + let has_dot_containerenv_file = + has_mount_on("/run/.containerenv") || has_mount_on("/var/run/.containerenv"); + + lazy_static! { + static ref BUNDLE_PATH_PATTERN: Regex = + Regex::new(r"/overlay-containers/([^/]+)/userdata$").unwrap(); + } + + let is_podman_bundle_path = match BUNDLE_PATH_PATTERN.captures(bundle_path.as_str()) { + Some(captures) => &captures[1] == container_id, + None => false, + }; + + if has_dot_containerenv_file && is_podman_bundle_path { + return Ok(Engine::Podman); + } + } + + // unknown engine + + bail!("could not identify container engine; crun-vm current only supports Podman, Docker, and Kubernetes"); + } +} diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index 1fb75e8..fd5c6d6 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -2,8 +2,8 @@ mod custom_opts; mod domain; +mod engine; mod first_boot; -mod runtime_env; use std::ffi::OsStr; use std::fs::{self, File, Permissions}; @@ -14,15 +14,13 @@ use std::process::{Command, Stdio}; use anyhow::{anyhow, bail, ensure, Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; -use lazy_static::lazy_static; use nix::sys::stat::{major, makedev, minor, mknod, Mode, SFlag}; -use regex::Regex; use rust_embed::RustEmbed; use crate::commands::create::custom_opts::CustomOptions; use crate::commands::create::domain::set_up_libvirt_domain_xml; +use crate::commands::create::engine::Engine; use crate::commands::create::first_boot::FirstBootConfig; -use crate::commands::create::runtime_env::RuntimeEnv; use crate::util::{ bind_mount_dir_with_different_context, bind_mount_file, create_overlay_vm_image, crun, find_single_file_in_dirs, fix_selinux_label, is_mountpoint, set_file_context, SpecExt, @@ -38,15 +36,9 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu let original_root_path: Utf8PathBuf = spec.root_path()?.canonicalize()?.try_into()?; // ensure absolute - let runtime_env = RuntimeEnv::current(&spec, &original_root_path)?; - let custom_options = CustomOptions::from_spec(&spec, runtime_env)?; - - let is_bootc_container = is_bootc_container( - &args.container_id, - bundle_path, - &original_root_path, - runtime_env, - )?; + let engine = Engine::detect(&args.container_id, bundle_path, &spec, &original_root_path)?; + let custom_options = CustomOptions::from_spec(&spec, engine)?; + let is_bootc_container = is_bootc_container(&original_root_path, engine)?; ensure!( !is_bootc_container || !custom_options.emulated, @@ -93,7 +85,7 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu let ssh_pub_key = set_up_ssh_key_pair( &mut spec, &custom_options, - runtime_env, + engine, &priv_dir_path, is_first_create, )?; @@ -159,31 +151,13 @@ fn ensure_unprivileged(spec: &oci_spec::runtime::Spec) -> Result<()> { Ok(()) } -fn is_bootc_container( - container_id: &str, - bundle_path: &Utf8Path, - original_root_path: &Utf8Path, - env: RuntimeEnv, -) -> Result { - lazy_static! { - static ref PATTERN: Regex = Regex::new(r"/overlay-containers/([^/]+)/userdata$").unwrap(); - } - +fn is_bootc_container(original_root_path: &Utf8Path, engine: Engine) -> Result { let is_bootc_container = original_root_path.join("usr/lib/bootc/install").is_dir(); - if is_bootc_container { - // check as much as we can that we're running under podman - - let is_podman_bundle_path = match PATTERN.captures(bundle_path.as_str()) { - Some(captures) => &captures[1] == container_id, - None => false, - }; - - ensure!( - env == RuntimeEnv::Other && is_podman_bundle_path, - "bootc containers are only supported with Podman" - ); - } + ensure!( + !is_bootc_container || engine == Engine::Podman, + "bootc containers are only supported with Podman" + ); Ok(is_bootc_container) } @@ -723,7 +697,7 @@ fn set_up_first_boot_config( fn set_up_ssh_key_pair( spec: &mut oci_spec::runtime::Spec, custom_options: &CustomOptions, - env: RuntimeEnv, + engine: Engine, priv_dir_path: &Utf8Path, is_first_create: bool, ) -> Result { @@ -741,7 +715,7 @@ fn set_up_ssh_key_pair( // - We're not running under Kubernetes (where there isn't a "host user"); and // - They have a key pair. let use_user_key_pair = !custom_options.random_ssh_key_pair - && env == RuntimeEnv::Other + && engine == Engine::Podman && user_ssh_dir.join("id_rsa.pub").is_file() && user_ssh_dir.join("id_rsa").is_file(); diff --git a/src/commands/create/runtime_env.rs b/src/commands/create/runtime_env.rs deleted file mode 100644 index 727802b..0000000 --- a/src/commands/create/runtime_env.rs +++ /dev/null @@ -1,51 +0,0 @@ -// SPDX-License-Identifier: GPL-2.0-or-later - -use std::fs; - -use anyhow::Result; -use camino::Utf8Path; - -#[derive(Clone, Copy, Debug, Eq, PartialEq)] -pub enum RuntimeEnv { - Docker, - Kubernetes, - Other, -} - -impl RuntimeEnv { - pub fn current( - spec: &oci_spec::runtime::Spec, - original_root_path: impl AsRef, - ) -> Result { - let has_kubernetes_secrets_dir = spec.mounts().iter().flatten().any(|m| { - m.destination() - .starts_with("/var/run/secrets/kubernetes.io") - }); - - let has_kubernetes_managed_etc_hosts = spec - .mounts() - .iter() - .flatten() - .filter(|m| m.destination() == Utf8Path::new("/etc/hosts")) - .flat_map(|m| m.source()) - .next() - .map(fs::read_to_string) - .transpose()? - .and_then(|hosts| hosts.lines().next().map(|line| line.to_string())) - .map(|line| line.contains("Kubernetes-managed hosts file")) - .unwrap_or(false); - - let has_dockerenv_dot_file = original_root_path - .as_ref() - .join(".dockerenv") - .try_exists()?; - - if has_kubernetes_secrets_dir || has_kubernetes_managed_etc_hosts { - Ok(RuntimeEnv::Kubernetes) - } else if has_dockerenv_dot_file { - Ok(RuntimeEnv::Docker) - } else { - Ok(RuntimeEnv::Other) - } - } -} From c0888c6ffbf97a6ae7bdc5480641da3ce9e678ea Mon Sep 17 00:00:00 2001 From: Alberto Faria Date: Fri, 26 Apr 2024 16:38:17 +0100 Subject: [PATCH 5/7] Extend bootc container support to Docker Signed-off-by: Alberto Faria --- embed/bootc/entrypoint.sh | 8 ++++++++ embed/bootc/prepare.sh | 33 ++++++++++++++++----------------- src/commands/create/engine.rs | 8 ++++++++ src/commands/create/mod.rs | 7 ++++--- 4 files changed, 36 insertions(+), 20 deletions(-) diff --git a/embed/bootc/entrypoint.sh b/embed/bootc/entrypoint.sh index 77983ba..3d7bfc6 100644 --- a/embed/bootc/entrypoint.sh +++ b/embed/bootc/entrypoint.sh @@ -35,6 +35,14 @@ fi # build disk image using bootc-install +# TODO: `bootc install to-disk` currently fails when using docker-archive. Fix +# the underlying issue to avoid this skopeo-copy command. +skopeo copy --quiet \ + docker-archive:/output/image.docker-archive \ + oci-archive:/output/image.oci-archive + +rm /output/image.docker-archive + PATH=/output/bin:$PATH bootc install to-disk \ --source-imgref oci-archive:/output/image.oci-archive \ --target-imgref "$image_name" \ diff --git a/embed/bootc/prepare.sh b/embed/bootc/prepare.sh index c2a4df2..1d1be71 100644 --- a/embed/bootc/prepare.sh +++ b/embed/bootc/prepare.sh @@ -3,9 +3,10 @@ set -o errexit -o pipefail -o nounset -original_root=$1 -priv_dir=$2 -container_id=$3 +engine=$1 +container_id=$2 +original_root=$3 +priv_dir=$4 __step() { printf "\033[36m%s\033[0m\n" "$*" @@ -21,12 +22,14 @@ exec > "$bootc_dir/progress" 2>&1 # get info about the container *image* image_info=$( - podman container inspect \ - --format '{{.ImageName}}\t{{.Image}}' \ + "$engine" container inspect \ + --format '{{.Config.Image}}'$'\t''{{.Image}}' \ "$container_id" ) image_name=$( cut -f1 <<< "$image_info" ) +# image_name=${image_name#sha256:} + image_id=$( cut -f2 <<< "$image_info" ) # check if VM image is cached @@ -34,7 +37,7 @@ image_id=$( cut -f2 <<< "$image_info" ) container_name=crun-vm-$container_id cache_image_label=crun-vm.from=$image_id -cache_image_id=$( podman images --filter "label=$cache_image_label" --format '{{.ID}}' --no-trunc ) +cache_image_id=$( "$engine" images --filter "label=$cache_image_label" --format '{{.ID}}' --no-trunc ) if [[ -n "$cache_image_id" ]]; then @@ -42,11 +45,11 @@ if [[ -n "$cache_image_id" ]]; then __step "Retrieving cached VM image..." - trap 'podman rm --force "$container_name" >/dev/null 2>&1 || true' EXIT + trap '"$engine" rm --force "$container_name" >/dev/null 2>&1 || true' EXIT - podman create --quiet --name "$container_name" "$cache_image_id" /dev/null - podman export "$container_name" | tar -C "$bootc_dir" -x image.qcow2 - podman rm "$container_name" >/dev/null 2>&1 + "$engine" create --quiet --name "$container_name" "$cache_image_id" /dev/null + "$engine" export "$container_name" | tar -C "$bootc_dir" -x image.qcow2 + "$engine" rm "$container_name" >/dev/null 2>&1 trap '' EXIT @@ -54,15 +57,11 @@ else __step "Converting $image_name into a VM image..." - # save container *image* as an OCI archive + # save container *image* as an archive echo -n 'Preparing container image...' - podman save \ - --format oci-archive \ - --output "$bootc_dir/image.oci-archive" \ - "$image_id" \ - &1 \ + "$engine" save --output "$bootc_dir/image.docker-archive" "$image_id" &1 \ | sed -u 's/.*/./' \ | stdbuf -o0 tr -d '\n' @@ -98,7 +97,7 @@ else __step "Caching VM image as a containerdisk..." id=$( - podman build --quiet --file - --label "$cache_image_label" "$bootc_dir" <<-'EOF' + "$engine" build --quiet --file - --label "$cache_image_label" "$bootc_dir" <<-'EOF' FROM scratch COPY image.qcow2 / ENTRYPOINT ["no-entrypoint"] diff --git a/src/commands/create/engine.rs b/src/commands/create/engine.rs index a004a3b..a989d53 100644 --- a/src/commands/create/engine.rs +++ b/src/commands/create/engine.rs @@ -16,6 +16,14 @@ pub enum Engine { } impl Engine { + pub fn command(self) -> Option<&'static str> { + match self { + Engine::Podman => Some("podman"), + Engine::Docker => Some("docker"), + Engine::Kubernetes => None, + } + } + pub fn detect( container_id: &str, bundle_path: &Utf8Path, diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index fd5c6d6..1b04885 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -117,9 +117,10 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu fs::create_dir_all(&bootc_dir)?; std::process::Command::new(bootc_dir.join("prepare.sh")) + .arg(engine.command().unwrap()) + .arg(&args.container_id) .arg(&original_root_path) .arg(&priv_dir_path) - .arg(&args.container_id) .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::null()) @@ -155,8 +156,8 @@ fn is_bootc_container(original_root_path: &Utf8Path, engine: Engine) -> Result Date: Sun, 28 Apr 2024 13:46:16 +0100 Subject: [PATCH 6/7] Add --bootc-disk-size option It allows settings the disk size of the VM image that is generated from a bootc container image. Also improve the default disk size by basing it on the container image size. Signed-off-by: Alberto Faria --- docs/2-podman-docker.md | 4 +++ embed/bootc/prepare.sh | 38 ++++++++++++++++++++++----- src/commands/create/custom_opts.rs | 3 +++ src/commands/create/mod.rs | 41 +++++++++++++++++++++++++----- tests/t/bootc-disk-size.sh | 16 ++++++++++++ 5 files changed, 89 insertions(+), 13 deletions(-) create mode 100644 tests/t/bootc-disk-size.sh diff --git a/docs/2-podman-docker.md b/docs/2-podman-docker.md index 4608bfe..afb8e59 100644 --- a/docs/2-podman-docker.md +++ b/docs/2-podman-docker.md @@ -111,6 +111,10 @@ $ podman run \ Internally, crun-vm generates a VM image from the bootable container and then boots it. +By default, the VM image is given a disk size roughly double the size of the +bootc container image. To change this, use the `--bootc-disk-size [KMGT]` +option. + ## First-boot customization ### cloud-init diff --git a/embed/bootc/prepare.sh b/embed/bootc/prepare.sh index 1d1be71..ea5afb3 100644 --- a/embed/bootc/prepare.sh +++ b/embed/bootc/prepare.sh @@ -7,6 +7,7 @@ engine=$1 container_id=$2 original_root=$3 priv_dir=$4 +disk_size=$5 __step() { printf "\033[36m%s\033[0m\n" "$*" @@ -32,12 +33,38 @@ image_name=$( cut -f1 <<< "$image_info" ) image_id=$( cut -f2 <<< "$image_info" ) +# determine disk size + +if [[ -z "$disk_size" ]]; then + container_image_size=$( + "$engine" image inspect --format '{{.VirtualSize}}' "$image_id" + ) + + # use double the container image size to allow for in-place updates + disk_size=$(( container_image_size * 2 )) + + # round up to 1 MiB + alignment=$(( 2**20 )) + disk_size=$(( (disk_size + alignment - 1) / alignment * alignment )) +fi + +truncate --size "$disk_size" "$bootc_dir/image.raw" +disk_size=$( stat --format %s "$bootc_dir/image.raw" ) + # check if VM image is cached container_name=crun-vm-$container_id -cache_image_label=crun-vm.from=$image_id -cache_image_id=$( "$engine" images --filter "label=$cache_image_label" --format '{{.ID}}' --no-trunc ) +cache_image_labels=( + "crun-vm.from=$image_id" + "crun-vm.size=$disk_size" +) + +cache_image_id=$( + "$engine" images \ + "${cache_image_labels[@]/#/--filter=label=}" \ + --format '{{.ID}}' --no-trunc + ) if [[ -n "$cache_image_id" ]]; then @@ -79,8 +106,6 @@ else # run bootc-install under krun - truncate --size 10G "$bootc_dir/image.raw" # TODO: allow adjusting disk size - trap 'krun delete --force "$container_name" >/dev/null 2>&1 || true' EXIT krun run --config "$bootc_dir/config.json" "$container_name" , + #[clap(long)] pub cloud_init: Option, diff --git a/src/commands/create/mod.rs b/src/commands/create/mod.rs index 1b04885..a9a4e55 100644 --- a/src/commands/create/mod.rs +++ b/src/commands/create/mod.rs @@ -14,7 +14,9 @@ use std::process::{Command, Stdio}; use anyhow::{anyhow, bail, ensure, Context, Result}; use camino::{Utf8Path, Utf8PathBuf}; +use lazy_static::lazy_static; use nix::sys::stat::{major, makedev, minor, mknod, Mode, SFlag}; +use regex::Regex; use rust_embed::RustEmbed; use crate::commands::create::custom_opts::CustomOptions; @@ -38,12 +40,7 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu let engine = Engine::detect(&args.container_id, bundle_path, &spec, &original_root_path)?; let custom_options = CustomOptions::from_spec(&spec, engine)?; - let is_bootc_container = is_bootc_container(&original_root_path, engine)?; - - ensure!( - !is_bootc_container || !custom_options.emulated, - "--emulated is incompatible with bootable containers" - ); + let is_bootc_container = is_bootc_container(&original_root_path, &custom_options, engine)?; // We include container_id in our paths to ensure no overlap with the user container's contents. let priv_dir_path = original_root_path.join(format!("crun-vm-{}", args.container_id)); @@ -121,6 +118,7 @@ pub fn create(args: &liboci_cli::Create, raw_args: &[impl AsRef]) -> Resu .arg(&args.container_id) .arg(&original_root_path) .arg(&priv_dir_path) + .arg(custom_options.bootc_disk_size.unwrap_or_default()) .stdin(Stdio::null()) .stdout(Stdio::null()) .stderr(Stdio::null()) @@ -152,7 +150,11 @@ fn ensure_unprivileged(spec: &oci_spec::runtime::Spec) -> Result<()> { Ok(()) } -fn is_bootc_container(original_root_path: &Utf8Path, engine: Engine) -> Result { +fn is_bootc_container( + original_root_path: &Utf8Path, + custom_options: &CustomOptions, + engine: Engine, +) -> Result { let is_bootc_container = original_root_path.join("usr/lib/bootc/install").is_dir(); ensure!( @@ -160,6 +162,31 @@ fn is_bootc_container(original_root_path: &Utf8Path, engine: Engine) -> Result Date: Sun, 28 Apr 2024 15:31:42 +0100 Subject: [PATCH 7/7] tests/env.sh: Expose TEST_ID variable to tests Its value is a random identifier unique to the current run of the current test. Signed-off-by: Alberto Faria --- tests/env.sh | 1 + tests/t/bootc-disk-size.sh | 10 +++++----- tests/t/cloud-init.sh | 8 ++++---- tests/t/emulated.sh | 4 ++-- tests/t/hostname.sh | 16 ++++++++-------- tests/t/ignition.sh | 6 +++--- tests/t/mount.sh | 14 +++++++------- tests/t/persistent.sh | 8 ++++---- tests/t/publish.sh | 12 ++++++------ tests/t/random-ssh-key-pair.sh | 8 ++++---- tests/t/stop-start.sh | 10 +++++----- 11 files changed, 49 insertions(+), 48 deletions(-) diff --git a/tests/env.sh b/tests/env.sh index 7400856..2172101 100755 --- a/tests/env.sh +++ b/tests/env.sh @@ -392,6 +392,7 @@ run) } TEMP_DIR=~/$label.temp UTIL_DIR=~/$label.util + TEST_ID=$label ENGINE=$engine export RUST_BACKTRACE=1 RUST_LIB_BACKTRACE=1 $( cat "$t" )\ diff --git a/tests/t/bootc-disk-size.sh b/tests/t/bootc-disk-size.sh index 64fa602..6d31461 100644 --- a/tests/t/bootc-disk-size.sh +++ b/tests/t/bootc-disk-size.sh @@ -4,13 +4,13 @@ image="${TEST_IMAGES[fedora-bootc]}" user="${TEST_IMAGES_DEFAULT_USER[fedora-bootc]}" __run() { - __engine run --detach --name bootc-disk-size "$image" --bootc-disk-size "$1" + __engine run --detach --name "$TEST_ID" "$image" --bootc-disk-size "$1" } __run 1M -! __engine exec bootc-disk-size --as "$user" -__engine rm --force bootc-disk-size +! __engine exec "$TEST_ID" --as "$user" +__engine rm --force "$TEST_ID" __run 4G -__engine exec bootc-disk-size --as "$user" -__engine rm --force bootc-disk-size +__engine exec "$TEST_ID" --as "$user" +__engine rm --force "$TEST_ID" diff --git a/tests/t/cloud-init.sh b/tests/t/cloud-init.sh index 5ac77e1..69575d2 100644 --- a/tests/t/cloud-init.sh +++ b/tests/t/cloud-init.sh @@ -19,18 +19,18 @@ EOF __engine run \ --rm --detach \ - --name cloud-init \ + --name "$TEST_ID" \ "$image" \ --cloud-init "$TEMP_DIR" __test() { - __engine exec cloud-init --as "$user" "cmp $home/file <<< hello" + __engine exec "$TEST_ID" --as "$user" "cmp $home/file <<< hello" } __test - __engine restart cloud-init + __engine restart "$TEST_ID" __test - __engine stop cloud-init + __engine stop "$TEST_ID" done diff --git a/tests/t/emulated.sh b/tests/t/emulated.sh index 0b6675a..7cffb4b 100644 --- a/tests/t/emulated.sh +++ b/tests/t/emulated.sh @@ -1,4 +1,4 @@ # SPDX-License-Identifier: GPL-2.0-or-later -__engine run --detach --name emulated "${TEST_IMAGES[fedora]}" --emulated -__engine exec emulated --as fedora +__engine run --detach --name "$TEST_ID" "${TEST_IMAGES[fedora]}" --emulated +__engine exec "$TEST_ID" --as fedora diff --git a/tests/t/hostname.sh b/tests/t/hostname.sh index c185f76..9879af2 100644 --- a/tests/t/hostname.sh +++ b/tests/t/hostname.sh @@ -7,36 +7,36 @@ for os in "${!TEST_IMAGES[@]}"; do # default hostname - id=$( __engine run --rm --detach --name "hostname-$os-default" "$image" ) + id=$( __engine run --rm --detach --name "$TEST_ID-$os-default" "$image" ) __test() { - __engine exec "hostname-$os-default" --as "$user" \ + __engine exec "$TEST_ID-$os-default" --as "$user" \ "set -x && [[ \$( hostname ) == ${id::12} ]]" } __test - __engine restart "hostname-$os-default" + __engine restart "$TEST_ID-$os-default" __test - __engine stop --time 0 "hostname-$os-default" + __engine stop --time 0 "$TEST_ID-$os-default" # custom hostname __engine run \ --rm --detach \ - --name "hostname-$os-custom" \ + --name "$TEST_ID-$os-custom" \ --hostname my-test-vm \ "$image" __test() { - __engine exec "hostname-$os-custom" --as "$user" \ + __engine exec "$TEST_ID-$os-custom" --as "$user" \ "set -x && [[ \$( hostname ) == my-test-vm ]]" } __test - __engine restart "hostname-$os-custom" + __engine restart "$TEST_ID-$os-custom" __test - __engine stop --time 0 "hostname-$os-custom" + __engine stop --time 0 "$TEST_ID-$os-custom" done diff --git a/tests/t/ignition.sh b/tests/t/ignition.sh index d2cab70..e2cfb9c 100644 --- a/tests/t/ignition.sh +++ b/tests/t/ignition.sh @@ -26,14 +26,14 @@ EOF __engine run \ --rm --detach \ - --name ignition \ + --name "$TEST_ID" \ "$image" \ --ignition "$TEMP_DIR/config.ign" __test() { - __engine exec ignition --as "$user" "cmp $home/file <<< hello" + __engine exec "$TEST_ID" --as "$user" "cmp $home/file <<< hello" } __test -__engine restart ignition +__engine restart "$TEST_ID" __test diff --git a/tests/t/mount.sh b/tests/t/mount.sh index 4be1085..726b596 100644 --- a/tests/t/mount.sh +++ b/tests/t/mount.sh @@ -10,37 +10,37 @@ for os in "${!TEST_IMAGES[@]}"; do __engine run \ --rm --detach \ - --name "mount-$os" \ + --name "$TEST_ID-$os" \ --volume "$TEMP_DIR/file:$home/file:z" \ --volume "$TEMP_DIR:$home/dir:z" \ --mount "type=tmpfs,dst=$home/tmp" \ "$image" __test() { - __engine exec "mount-$os" --as "$user" + __engine exec "$TEST_ID-$os" --as "$user" - __engine exec "mount-$os" --as "$user" " + __engine exec "$TEST_ID-$os" --as "$user" " set -e [[ -b $home/file ]] sudo cmp -n 6 $home/file <<< hello " - __engine exec "mount-$os" --as "$user" " + __engine exec "$TEST_ID-$os" --as "$user" " set -e mount -l | grep '^virtiofs-0 on $home/dir type virtiofs' [[ -d $home/dir ]] sudo cmp $home/dir/file <<< hello " - __engine exec "mount-$os" --as "$user" " + __engine exec "$TEST_ID-$os" --as "$user" " mount -l | grep '^tmpfs on $home/tmp type tmpfs' " } __test - __engine restart "mount-$os" + __engine restart "$TEST_ID-$os" __test - __engine stop --time 0 "mount-$os" + __engine stop --time 0 "$TEST_ID-$os" done diff --git a/tests/t/persistent.sh b/tests/t/persistent.sh index 0f8c266..e850031 100644 --- a/tests/t/persistent.sh +++ b/tests/t/persistent.sh @@ -9,14 +9,14 @@ fi # Usage: __run [] __run() { - __engine run --rm --detach --name persistent "${@:2}" --rootfs "$TEMP_DIR" "$1" + __engine run --rm --detach --name "$TEST_ID" "${@:2}" --rootfs "$TEMP_DIR" "$1" } # Usage: __test __test() { id=$( __run "$1" ) - __engine exec persistent --as fedora "$2" - __engine stop persistent + __engine exec "$TEST_ID" --as fedora "$2" + __engine stop "$TEST_ID" if [[ "$ENGINE" != rootful-podman ]]; then # ensure user that invoked `engine run` can delete crun-vm state @@ -34,4 +34,4 @@ __test "" '[[ -e i-was-here ]]' ! RUST_LIB_BACKTRACE=0 __run --persistent --read-only __run "" --read-only -__engine exec persistent --as fedora +__engine exec "$TEST_ID" --as fedora diff --git a/tests/t/publish.sh b/tests/t/publish.sh index 45ef70a..07213d9 100644 --- a/tests/t/publish.sh +++ b/tests/t/publish.sh @@ -1,22 +1,22 @@ # SPDX-License-Identifier: GPL-2.0-or-later -trap '__engine stop publish' EXIT +trap '__engine stop "$TEST_ID"' EXIT for os in fedora fedora-bootc; do image="${TEST_IMAGES[$os]}" user="${TEST_IMAGES_DEFAULT_USER[$os]}" - __engine run --rm --detach --name publish --publish 127.0.0.1::8000 "$image" + __engine run --rm --detach --name "$TEST_ID" --publish 127.0.0.1::8000 "$image" - endpoint=$( __engine port publish | tee /dev/stderr | cut -d' ' -f3 ) + endpoint=$( __engine port "$TEST_ID" | tee /dev/stderr | cut -d' ' -f3 ) - __engine exec publish --as "$user" + __engine exec "$TEST_ID" --as "$user" __log 'Ensuring curl fails...' ! curl "$endpoint" 2>/dev/null - __engine exec publish --as "$user" python -m http.server & + __engine exec "$TEST_ID" --as "$user" python -m http.server & __log 'Ensuring curl succeeds...' @@ -28,6 +28,6 @@ for os in fedora fedora-bootc; do sleep 1 done - __engine stop publish + __engine stop "$TEST_ID" done diff --git a/tests/t/random-ssh-key-pair.sh b/tests/t/random-ssh-key-pair.sh index fcd7afd..3fe4c1d 100644 --- a/tests/t/random-ssh-key-pair.sh +++ b/tests/t/random-ssh-key-pair.sh @@ -2,10 +2,10 @@ __engine run \ --detach \ - --name random-ssh-key-pair \ + --name "$TEST_ID" \ "${TEST_IMAGES[fedora]}" \ --random-ssh-key-pair -__engine exec random-ssh-key-pair --as fedora -__engine restart random-ssh-key-pair -__engine exec random-ssh-key-pair --as fedora +__engine exec "$TEST_ID" --as fedora +__engine restart "$TEST_ID" +__engine exec "$TEST_ID" --as fedora diff --git a/tests/t/stop-start.sh b/tests/t/stop-start.sh index bdce535..d0e47fb 100644 --- a/tests/t/stop-start.sh +++ b/tests/t/stop-start.sh @@ -1,14 +1,14 @@ # SPDX-License-Identifier: GPL-2.0-or-later -__engine run --detach --name stop-start "${TEST_IMAGES[fedora]}" +__engine run --detach --name "$TEST_ID" "${TEST_IMAGES[fedora]}" -__engine exec stop-start --as fedora '[[ ! -e i-was-here ]] && touch i-was-here' +__engine exec "$TEST_ID" --as fedora '[[ ! -e i-was-here ]] && touch i-was-here' for (( i = 0; i < 2; ++i )); do - __engine stop stop-start - __engine start stop-start + __engine stop "$TEST_ID" + __engine start "$TEST_ID" - __engine exec stop-start --as fedora '[[ -e i-was-here ]]' + __engine exec "$TEST_ID" --as fedora '[[ -e i-was-here ]]' done