diff --git a/lib/src/boundimage.rs b/lib/src/boundimage.rs index bc78d538a..0e85b36d7 100644 --- a/lib/src/boundimage.rs +++ b/lib/src/boundimage.rs @@ -5,8 +5,6 @@ //! pre-pulled (and in the future, pinned) before a new image root //! is considered ready. -use std::num::NonZeroUsize; - use anyhow::{Context, Result}; use camino::Utf8Path; use cap_std_ext::cap_std::fs::Dir; @@ -49,7 +47,7 @@ pub(crate) async fn pull_bound_images(sysroot: &Storage, deployment: &Deployment #[context("Querying bound images")] pub(crate) fn query_bound_images_for_deployment( - sysroot: &Storage, + sysroot: &ostree_ext::ostree::Sysroot, deployment: &Deployment, ) -> Result<Vec<BoundImage>> { let deployment_root = &crate::utils::deployment_fd(sysroot, deployment)?; @@ -153,15 +151,21 @@ pub(crate) async fn pull_images( sysroot: &Storage, bound_images: Vec<crate::boundimage::BoundImage>, ) -> Result<()> { - tracing::debug!("Pulling bound images: {}", bound_images.len()); - // Yes, the usage of NonZeroUsize here is...maybe odd looking, but I find - // it an elegant way to divide (empty vector, non empty vector) since - // we want to print the length too below. - let Some(n) = NonZeroUsize::new(bound_images.len()) else { - return Ok(()); - }; // Only do work like initializing the image storage if we have images to pull. + if bound_images.is_empty() { + return Ok(()); + } let imgstore = sysroot.get_ensure_imgstore()?; + pull_images_impl(imgstore, bound_images).await +} + +#[context("Pulling bound images")] +pub(crate) async fn pull_images_impl( + imgstore: &crate::imgstorage::Storage, + bound_images: Vec<crate::boundimage::BoundImage>, +) -> Result<()> { + let n = bound_images.len(); + tracing::debug!("Pulling bound images: {n}"); // TODO: do this in parallel for bound_image in bound_images { let image = &bound_image.image; diff --git a/lib/src/cli.rs b/lib/src/cli.rs index 84eb58b6e..a7b95f972 100644 --- a/lib/src/cli.rs +++ b/lib/src/cli.rs @@ -182,6 +182,15 @@ pub(crate) enum InstallOpts { /// will be wiped, but the content of the existing root will otherwise be retained, and will /// need to be cleaned up if desired when rebooted into the new root. ToExistingRoot(crate::install::InstallToExistingRootOpts), + /// Intended for use in environments that are performing an ostree-based installation, not bootc. + /// + /// In this scenario the installation may be missing bootc specific features such as + /// kernel arguments, logically bound images and more. This command can be used to attempt + /// to reconcile. At the current time, the only tested environment is Anaconda using `ostreecontainer` + /// and it is recommended to avoid usage outside of that environment. Instead, ensure your + /// code is using `bootc install to-filesystem` from the start. + #[clap(hide = true)] + EnsureCompletion {}, /// Output JSON to stdout that contains the merged installation configuration /// as it may be relevant to calling processes using `install to-filesystem` /// that in particular want to discover the desired root filesystem type from the container image. @@ -346,6 +355,14 @@ pub(crate) enum InternalsOpts { #[clap(allow_hyphen_values = true)] args: Vec<OsString>, }, + /// Invoked from ostree-ext to complete an installation. + BootcInstallCompletion { + /// Path to the sysroot + sysroot: Utf8PathBuf, + + // The stateroot + stateroot: String, + }, } #[derive(Debug, clap::Subcommand, PartialEq, Eq)] @@ -989,6 +1006,10 @@ async fn run_from_opt(opt: Opt) -> Result<()> { crate::install::install_to_existing_root(opts).await } InstallOpts::PrintConfiguration => crate::install::print_configuration(), + InstallOpts::EnsureCompletion {} => { + let rootfs = &Dir::open_ambient_dir("/", cap_std::ambient_authority())?; + crate::install::completion::run_from_anaconda(rootfs).await + } }, #[cfg(feature = "install")] Opt::ExecInHostMountNamespace { args } => { @@ -1026,6 +1047,10 @@ async fn run_from_opt(opt: Opt) -> Result<()> { let sysroot = get_storage().await?; crate::deploy::cleanup(&sysroot).await } + InternalsOpts::BootcInstallCompletion { sysroot, stateroot } => { + let rootfs = &Dir::open_ambient_dir("/", cap_std::ambient_authority())?; + crate::install::completion::run_from_ostree(rootfs, &sysroot, &stateroot).await + } }, #[cfg(feature = "docgen")] Opt::Man(manopts) => crate::docgen::generate_manpages(&manopts.directory), diff --git a/lib/src/install.rs b/lib/src/install.rs index 845379a63..c376991ac 100644 --- a/lib/src/install.rs +++ b/lib/src/install.rs @@ -7,6 +7,7 @@ // This sub-module is the "basic" installer that handles creating basic block device // and filesystem setup. pub(crate) mod baseline; +pub(crate) mod completion; pub(crate) mod config; mod osbuild; pub(crate) mod osconfig; @@ -762,6 +763,7 @@ async fn install_container( )?; let kargsd = kargsd.iter().map(|s| s.as_str()); + // Keep this in sync with install/completion.rs for the Anaconda fixups let install_config_kargs = state .install_config .as_ref() @@ -786,6 +788,7 @@ async fn install_container( options.kargs = Some(kargs.as_slice()); options.target_imgref = Some(&state.target_imgref); options.proxy_cfg = proxy_cfg; + options.skip_completion = true; // Must be set to avoid recursion! options.no_clean = has_ostree; let imgstate = crate::utils::async_task_with_spinner( "Deploying container image", @@ -1383,7 +1386,7 @@ async fn install_with_sysroot( } } BoundImages::Unresolved(bound_images) => { - crate::boundimage::pull_images(sysroot, bound_images) + crate::boundimage::pull_images_impl(imgstore, bound_images) .await .context("pulling bound images")?; } diff --git a/lib/src/install/completion.rs b/lib/src/install/completion.rs new file mode 100644 index 000000000..1b318beb8 --- /dev/null +++ b/lib/src/install/completion.rs @@ -0,0 +1,298 @@ +//! This module handles finishing/completion after an ostree-based +//! install from e.g. Anaconda. + +use std::io; +use std::os::fd::AsFd; +use std::process::Command; + +use anyhow::{Context, Result}; +use bootc_utils::CommandRunExt; +use camino::Utf8Path; +use cap_std_ext::{cap_std::fs::Dir, dirext::CapStdExtDirExt}; +use fn_error_context::context; +use ostree_ext::{gio, ostree}; +use rustix::fs::Mode; +use rustix::fs::OFlags; + +use super::config; +use crate::utils::medium_visibility_warning; + +/// An environment variable set by anaconda that hints +/// we are running as part of that environment. +const ANACONDA_ENV_HINT: &str = "ANA_INSTALL_PATH"; +/// Global flag to signal we're in a booted ostree system +const OSTREE_BOOTED: &str = "run/ostree-booted"; +/// The very well-known DNS resolution file +const RESOLVCONF: &str = "etc/resolv.conf"; +/// A renamed file +const RESOLVCONF_ORIG: &str = "etc/resolv.conf.bootc-original"; +/// The root filesystem for pid 1 +const PROC1_ROOT: &str = "proc/1/root"; +/// The cgroupfs mount point, which we may propagate from the host if needed +const CGROUPFS: &str = "sys/fs/cgroup"; +/// The path to the temporary global ostree pull secret +const RUN_OSTREE_AUTH: &str = "run/ostree/auth.json"; +/// A sub path of /run which is used to ensure idempotency +pub(crate) const RUN_BOOTC_INSTALL_RECONCILED: &str = "run/bootc-install-reconciled"; + +/// Assuming that the current root is an ostree deployment, pull kargs +/// from it and inject them. +fn reconcile_kargs(sysroot: &ostree::Sysroot, deployment: &ostree::Deployment) -> Result<()> { + let deployment_root = &crate::utils::deployment_fd(sysroot, deployment)?; + let cancellable = gio::Cancellable::NONE; + + let current_kargs = deployment + .bootconfig() + .expect("bootconfig for deployment") + .get("options"); + let current_kargs = current_kargs + .as_ref() + .map(|s| s.as_str()) + .unwrap_or_default(); + tracing::debug!("current_kargs={current_kargs}"); + let current_kargs = ostree::KernelArgs::from_string(¤t_kargs); + + // Keep this in sync with install_container + let install_config = config::load_config()?; + let install_config_kargs = install_config + .as_ref() + .and_then(|c| c.kargs.as_ref()) + .into_iter() + .flatten() + .map(|s| s.as_str()) + .collect::<Vec<_>>(); + let kargsd = crate::kargs::get_kargs_in_root(deployment_root, std::env::consts::ARCH)?; + let kargsd = kargsd.iter().map(|s| s.as_str()).collect::<Vec<_>>(); + + current_kargs.append_argv(&install_config_kargs); + current_kargs.append_argv(&kargsd); + let new_kargs = current_kargs.to_string(); + tracing::debug!("new_kargs={new_kargs}"); + + sysroot.deployment_set_kargs_in_place(deployment, Some(&new_kargs), cancellable)?; + Ok(()) +} + +/// A little helper struct which on drop renames a file. Used for putting back /etc/resolv.conf. +#[must_use] +struct Renamer<'d> { + dir: &'d Dir, + from: &'static Utf8Path, + to: &'static Utf8Path, +} + +impl<'d> Renamer<'d> { + fn _impl_drop(&mut self) -> Result<()> { + self.dir + .rename(self.from, self.dir, self.to) + .map_err(Into::into) + } + + fn consume(mut self) -> Result<()> { + self._impl_drop() + } +} + +impl<'d> Drop for Renamer<'d> { + fn drop(&mut self) { + let _ = self._impl_drop(); + } +} +/// Work around https://github.com/containers/buildah/issues/4242#issuecomment-2492480586 +/// among other things. We unconditionally replace the contents of `/etc/resolv.conf` +/// in the target root with whatever the host uses (in Fedora 41+, that's systemd-resolved for Anaconda). +#[context("Copying host resolv.conf")] +fn ensure_resolvconf<'d>(rootfs: &'d Dir, proc1_root: &Dir) -> Result<Option<Renamer<'d>>> { + // Now check the state of etc/resolv.conf in the target root + let meta = rootfs + .symlink_metadata_optional(RESOLVCONF) + .context("stat")?; + let renamer = if meta.is_some() { + rootfs + .rename(RESOLVCONF, &rootfs, RESOLVCONF_ORIG) + .context("Renaming")?; + Some(Renamer { + dir: &rootfs, + from: RESOLVCONF_ORIG.into(), + to: RESOLVCONF.into(), + }) + } else { + None + }; + // If we got here, /etc/resolv.conf either didn't exist or we removed it. + // Copy the host data into it (note this will follow symlinks; e.g. + // Anaconda in Fedora 41+ defaults to systemd-resolved) + proc1_root + .copy(RESOLVCONF, rootfs, RESOLVCONF) + .context("Copying new resolv.conf")?; + Ok(renamer) +} + +/// Bind a mount point from the host namespace into our root +fn bind_from_host( + rootfs: &Dir, + src: impl AsRef<Utf8Path>, + target: impl AsRef<Utf8Path>, +) -> Result<()> { + fn bind_from_host_impl(rootfs: &Dir, src: &Utf8Path, target: &Utf8Path) -> Result<()> { + rootfs.create_dir_all(target)?; + if rootfs.is_mountpoint(target)?.unwrap_or_default() { + return Ok(()); + } + let target = format!("/mnt/sysroot/{target}"); + tracing::debug!("Binding {src} to {target}"); + // We're run in a mount namespace, but not a pid namespace; use nsenter + // via the pid namespace to escape to the host's mount namespace and + // perform a mount there. + Command::new("nsenter") + .args(["-m", "-t", "1", "--", "mount", "--bind"]) + .arg(src) + .arg(&target) + .run()?; + Ok(()) + } + + bind_from_host_impl(rootfs, src.as_ref(), target.as_ref()) +} + +/// Anaconda doesn't mount /sys/fs/cgroup in /mnt/sysroot +#[context("Ensuring cgroupfs")] +fn ensure_cgroupfs(rootfs: &Dir) -> Result<()> { + bind_from_host(rootfs, CGROUPFS, CGROUPFS) +} + +/// If we have /etc/ostree/auth.json in the Anaconda environment then propagate +/// it into /run/ostree/auth.json +#[context("Propagating ostree auth")] +fn ensure_ostree_auth(rootfs: &Dir, host_root: &Dir) -> Result<()> { + let Some((authpath, authfd)) = + ostree_ext::globals::get_global_authfile(&host_root).context("Querying authfiles")? + else { + tracing::debug!("No auth found in host"); + return Ok(()); + }; + tracing::debug!("Discovered auth in host: {authpath}"); + let mut authfd = io::BufReader::new(authfd); + let run_ostree_auth = Utf8Path::new(RUN_OSTREE_AUTH); + rootfs.create_dir_all(run_ostree_auth.parent().unwrap())?; + rootfs.atomic_replace_with(run_ostree_auth, |w| std::io::copy(&mut authfd, w))?; + Ok(()) +} + +#[context("Opening {PROC1_ROOT}")] +fn open_proc1_root(rootfs: &Dir) -> Result<Dir> { + let proc1_root = rustix::fs::openat( + &rootfs.as_fd(), + PROC1_ROOT, + OFlags::CLOEXEC | OFlags::DIRECTORY, + Mode::empty(), + )?; + Dir::reopen_dir(&proc1_root.as_fd()).map_err(Into::into) +} + +/// Core entrypoint invoked when we are likely being invoked from inside Anaconda as a `%post`. +pub(crate) async fn run_from_anaconda(rootfs: &Dir) -> Result<()> { + // unshare our mount namespace, so any *further* mounts aren't leaked. + // Note that because this does a re-exec, anything *before* this point + // should be idempotent. + crate::cli::ensure_self_unshared_mount_namespace()?; + + if std::env::var_os(ANACONDA_ENV_HINT).is_none() { + // Be loud if a user is invoking this outside of the expected setup. + medium_visibility_warning(&format!("Missing environment variable {ANACONDA_ENV_HINT}")); + } else { + // In the way Anaconda sets up the bind mounts today, this doesn't exist. Later + // code expects it to exist, so do so. + if !rootfs.try_exists(OSTREE_BOOTED)? { + tracing::debug!("Writing {OSTREE_BOOTED}"); + rootfs.atomic_write(OSTREE_BOOTED, b"")?; + } + } + + // Get access to the real root by opening /proc/1/root + let proc1_root = &open_proc1_root(rootfs)?; + + if proc1_root + .try_exists(RUN_BOOTC_INSTALL_RECONCILED) + .context("Querying reconciliation")? + { + println!("Reconciliation already completed."); + return Ok(()); + } + + ensure_cgroupfs(rootfs)?; + // Sometimes Anaconda may not initialize networking in the target root? + let resolvconf = ensure_resolvconf(rootfs, proc1_root)?; + // Propagate an injected authfile for pulling logically bound images + ensure_ostree_auth(rootfs, proc1_root)?; + + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path("/"))); + sysroot + .load(gio::Cancellable::NONE) + .context("Loading sysroot")?; + impl_completion(rootfs, &sysroot, None).await?; + + proc1_root + .write(RUN_BOOTC_INSTALL_RECONCILED, b"") + .with_context(|| format!("Writing {RUN_BOOTC_INSTALL_RECONCILED}"))?; + if let Some(resolvconf) = resolvconf { + resolvconf.consume()?; + } + Ok(()) +} + +/// From ostree-rs-ext, run through the rest of bootc install functionality +pub async fn run_from_ostree(rootfs: &Dir, sysroot: &Utf8Path, stateroot: &str) -> Result<()> { + // Load sysroot from the provided path + let sysroot = ostree::Sysroot::new(Some(&gio::File::for_path(sysroot))); + sysroot.load(gio::Cancellable::NONE)?; + + impl_completion(rootfs, &sysroot, Some(stateroot)).await?; + + // In this case we write the completion directly to /run as we're running from + // the host context. + rootfs + .write(RUN_BOOTC_INSTALL_RECONCILED, b"") + .with_context(|| format!("Writing {RUN_BOOTC_INSTALL_RECONCILED}"))?; + Ok(()) +} + +/// Core entrypoint for completion of an ostree-based install to a bootc one: +/// +/// - kernel argument handling +/// - logically bound images +/// +/// We could also do other things here, such as write an aleph file or +/// ensure the repo config is synchronized, but these two are the most important +/// for now. +pub(crate) async fn impl_completion( + rootfs: &Dir, + sysroot: &ostree::Sysroot, + stateroot: Option<&str>, +) -> Result<()> { + let deployment = &sysroot + .merge_deployment(stateroot) + .ok_or_else(|| anyhow::anyhow!("Failed to find deployment (stateroot={stateroot:?}"))?; + let sysroot_dir = Dir::reopen_dir(&crate::utils::sysroot_fd(&sysroot))?; + + // Create a subdir in /run + let rundir = "run/bootc-install"; + rootfs.create_dir_all(rundir)?; + let rundir = &rootfs.open_dir(rundir)?; + + // ostree-ext doesn't do kargs, so handle that now + reconcile_kargs(&sysroot, deployment)?; + + // ostree-ext doesn't do logically bound images + let bound_images = crate::boundimage::query_bound_images_for_deployment(sysroot, deployment)?; + if !bound_images.is_empty() { + // When we're run through ostree, we only lazily initialize the podman storage to avoid + // having a hard dependency on it. + let imgstorage = &crate::imgstorage::Storage::create(&sysroot_dir, &rundir)?; + crate::boundimage::pull_images_impl(imgstorage, bound_images) + .await + .context("pulling bound images")?; + } + + Ok(()) +} diff --git a/ostree-ext/src/container/deploy.rs b/ostree-ext/src/container/deploy.rs index 4d0ec1bf4..9d00fba28 100644 --- a/ostree-ext/src/container/deploy.rs +++ b/ostree-ext/src/container/deploy.rs @@ -1,9 +1,13 @@ //! Perform initial setup for a container image based system root use std::collections::HashSet; +use std::os::fd::BorrowedFd; +use std::process::Command; use anyhow::Result; +use cap_std_ext::cmdext::CapStdExtCommandExt; use fn_error_context::context; +use ocidir::cap_std::fs::Dir; use ostree::glib; use super::store::{gc_image_layers, LayeredImageState}; @@ -44,10 +48,19 @@ pub struct DeployOpts<'a> { /// it will not be necessary to remove the previous image. pub no_imgref: bool, + /// Do not invoke bootc completion + pub skip_completion: bool, + /// Do not cleanup deployments pub no_clean: bool, } +// Access the file descriptor for a sysroot +#[allow(unsafe_code)] +pub(crate) fn sysroot_fd(sysroot: &ostree::Sysroot) -> BorrowedFd { + unsafe { BorrowedFd::borrow_raw(sysroot.fd()) } +} + /// Write a container image to an OSTree deployment. /// /// This API is currently intended for only an initial deployment. @@ -58,6 +71,7 @@ pub async fn deploy( imgref: &OstreeImageReference, options: Option<DeployOpts<'_>>, ) -> Result<Box<LayeredImageState>> { + let sysroot_dir = &Dir::reopen_dir(&sysroot_fd(sysroot))?; let cancellable = ostree::gio::Cancellable::NONE; let options = options.unwrap_or_default(); let repo = &sysroot.repo(); @@ -122,6 +136,24 @@ pub async fn deploy( flags, cancellable, )?; + + // We end up re-executing ourselves as a subprocess because + // otherwise right now we end up with a circular dependency between + // crates. We need an option to skip though so when the *main* + // bootc install code calls this API, we don't do this as it + // will have already been handled. + if !options.skip_completion { + // Note that the sysroot is provided as `.` but we use cwd_dir to + // make the process current working directory the sysroot. + let st = Command::new("bootc") + .args(["internals", "bootc-install-completion", ".", stateroot]) + .cwd_dir(sysroot_dir.try_clone()?) + .status()?; + if !st.success() { + anyhow::bail!("Failed to complete bootc install"); + } + } + if !options.no_clean { sysroot.cleanup(cancellable)?; }