From 8fbaebac586cfb59a5bd8213839940d16667bcb4 Mon Sep 17 00:00:00 2001 From: Colin Walters Date: Thu, 7 Dec 2023 10:21:05 -0500 Subject: [PATCH] prepare-root: Add support for root.transient Closes: https://github.com/ostreedev/ostree/issues/3113 It'd greatly improve compatibility with things like RPMs that install in `/opt` if we supported a full "original docker" style model where `/` is a transient overlayfs. We'd still keep our semantics for `/etc` and `/var` by default, but e.g. we'd stop recommending `/opt` :arrow_right: `/var/opt`, in this model, so `/opt` would be on the overlayfs. Note this all aligns with composefs, where we'd actually be making `/` a *read-only* overlayfs by default; it'd be really nice of course to *implement* this by just making the composefs overlayfs writable, but I am not sure we can hard require composefs for this right now. So this change adds support for `root.transient = true` in `/usr/lib/ostree/prepare-root.conf`. The major downside is that people could be surprised if files they write to e.g. `/opt` don't persist across upgrades. But, that's already again how it works since Docker started. Note as part of the implementation of this, we need to add a whole new "backing" directory distinct from the deployment directories. (Tangentially related to this, it's tempting to switch to always using a *read-only* overlay mount by default. --- src/libostree/ostree-sysroot-cleanup.c | 7 ++++ src/libostree/ostree-sysroot-deploy.c | 50 ++++++++++++++++++++++++++ src/libostree/ostree-sysroot-private.h | 2 ++ src/libostree/ostree-sysroot.c | 13 ++++++- src/libotcore/otcore.h | 7 ++++ src/switchroot/ostree-prepare-root.c | 39 ++++++++++++++++++-- 6 files changed, 115 insertions(+), 3 deletions(-) diff --git a/src/libostree/ostree-sysroot-cleanup.c b/src/libostree/ostree-sysroot-cleanup.c index ba9b3bacda..e5ffc5a754 100644 --- a/src/libostree/ostree-sysroot-cleanup.c +++ b/src/libostree/ostree-sysroot-cleanup.c @@ -218,6 +218,7 @@ gboolean _ostree_sysroot_rmrf_deployment (OstreeSysroot *self, OstreeDeployment *deployment, GCancellable *cancellable, GError **error) { + g_autofree char *backing_relpath = _ostree_sysroot_get_deployment_backing_relpath (deployment); g_autofree char *origin_relpath = ostree_deployment_get_origin_relpath (deployment); g_autofree char *deployment_path = ostree_sysroot_get_deployment_dirpath (self, deployment); struct stat stbuf; @@ -238,6 +239,12 @@ _ostree_sysroot_rmrf_deployment (OstreeSysroot *self, OstreeDeployment *deployme /* This deployment wasn't referenced, so delete it */ if (!_ostree_linuxfs_fd_alter_immutable_flag (deployment_fd, FALSE, cancellable, error)) return FALSE; + /* Note we must delete the origin and backing directories first, as the "source of truth" + * is the deployment path. We don't currently have code that detects "orphaned" + * origin files or work directories. + */ + if (!glnx_shutil_rm_rf_at (self->sysroot_fd, backing_relpath, cancellable, error)) + return FALSE; if (!glnx_shutil_rm_rf_at (self->sysroot_fd, origin_relpath, cancellable, error)) return FALSE; if (!glnx_shutil_rm_rf_at (self->sysroot_fd, deployment_path, cancellable, error)) diff --git a/src/libostree/ostree-sysroot-deploy.c b/src/libostree/ostree-sysroot-deploy.c index a809d560a7..ebc0b4fcb2 100644 --- a/src/libostree/ostree-sysroot-deploy.c +++ b/src/libostree/ostree-sysroot-deploy.c @@ -3078,6 +3078,10 @@ sysroot_initialize_deployment (OstreeSysroot *self, const char *osname, const ch if (!require_stateroot (self, osname, error)) return FALSE; + g_autofree char *stateroot_backing = g_strdup_printf ("ostree/deploy/%s/backing", osname); + if (!glnx_shutil_mkdir_p_at (self->sysroot_fd, stateroot_backing, 0700, cancellable, error)) + return glnx_prefix_error (error, "Creating backing directory"); + OstreeRepo *repo = ostree_sysroot_repo (self); gint new_deployserial; @@ -3295,6 +3299,49 @@ sysroot_finalize_selinux_policy (int deployment_dfd, GError **error) } #endif /* HAVE_SELINUX */ +static gboolean +sysroot_initialize_deployment_backing (OstreeSysroot *self, OstreeDeployment *deployment, + OstreeSePolicy *sepolicy, GError **error) +{ + GLNX_AUTO_PREFIX_ERROR ("Preparing deployment backing dir", error); + g_autofree char *deployment_path = ostree_sysroot_get_deployment_dirpath (self, deployment); + g_autofree char *backing_relpath = _ostree_sysroot_get_deployment_backing_relpath (deployment); + struct stat stbuf; + + if (!glnx_fstatat (self->sysroot_fd, deployment_path, &stbuf, AT_SYMLINK_NOFOLLOW, error)) + return FALSE; + + // Create the "backing" directory with additional data */ + if (!glnx_ensure_dir (self->sysroot_fd, backing_relpath, 0700, error)) + return glnx_prefix_error (error, "Creating backing dir"); + + // The root-transient holds overlayfs directories for the root + g_autofree char *rootovldir + = g_build_filename (backing_relpath, OSTREE_DEPLOYMENT_ROOT_TRANSIENT_DIR, NULL); + if (!glnx_ensure_dir (self->sysroot_fd, rootovldir, 0700, error)) + return glnx_prefix_error (error, "Creating root ovldir"); + + // The overlayfs work (subdirectory of root-transient) + g_autofree char *workdir = g_build_filename (rootovldir, "work", NULL); + if (!glnx_ensure_dir (self->sysroot_fd, workdir, 0700, error)) + return glnx_prefix_error (error, "Creating work dir"); + + // Create the overlayfs upper; this needs to have the same mode and SELinux label as the root + { + g_auto (OstreeSepolicyFsCreatecon) con = { + 0, + }; + + if (!_ostree_sepolicy_preparefscreatecon (&con, sepolicy, "/", stbuf.st_mode, error)) + return glnx_prefix_error (error, "Looking up SELinux label for /"); + g_autofree char *upperdir = g_build_filename (rootovldir, "upper", NULL); + if (!glnx_ensure_dir (self->sysroot_fd, upperdir, stbuf.st_mode, error)) + return glnx_prefix_error (error, "Creating upper dir"); + } + + return TRUE; +} + static gboolean sysroot_finalize_deployment (OstreeSysroot *self, OstreeDeployment *deployment, OstreeDeployment *merge_deployment, GCancellable *cancellable, @@ -3360,6 +3407,9 @@ sysroot_finalize_deployment (OstreeSysroot *self, OstreeDeployment *deployment, if (!selinux_relabel_var_if_needed (self, sepolicy, os_deploy_dfd, cancellable, error)) return FALSE; + if (!sysroot_initialize_deployment_backing (self, deployment, sepolicy, error)) + return FALSE; + /* Rewrite the origin using the final merged selinux config, just to be * conservative about getting the right labels. */ diff --git a/src/libostree/ostree-sysroot-private.h b/src/libostree/ostree-sysroot-private.h index 7373b19b63..5cbeae0ecb 100644 --- a/src/libostree/ostree-sysroot-private.h +++ b/src/libostree/ostree-sysroot-private.h @@ -142,6 +142,8 @@ gboolean _ostree_sysroot_boot_complete (OstreeSysroot *self, GCancellable *cance OstreeDeployment *_ostree_sysroot_deserialize_deployment_from_variant (GVariant *v, GError **error); +char *_ostree_sysroot_get_deployment_backing_relpath (OstreeDeployment *deployment); + gboolean _ostree_sysroot_rmrf_deployment (OstreeSysroot *sysroot, OstreeDeployment *deployment, GCancellable *cancellable, GError **error); diff --git a/src/libostree/ostree-sysroot.c b/src/libostree/ostree-sysroot.c index 62adc6221c..5bcae1c79a 100644 --- a/src/libostree/ostree-sysroot.c +++ b/src/libostree/ostree-sysroot.c @@ -36,7 +36,7 @@ #include "ostree-repo-private.h" #include "ostree-sepolicy-private.h" #include "ostree-sysroot-private.h" -#include "ostree.h" +#include "otcore.h" /** * SECTION:ostree-sysroot @@ -1965,6 +1965,17 @@ ostree_sysroot_simple_write_deployment (OstreeSysroot *sysroot, const char *osna return TRUE; } +/* Return the sysroot-relative path to the "backing" directory of a deployment + * which can hold additional data. + */ +char * +_ostree_sysroot_get_deployment_backing_relpath (OstreeDeployment *deployment) +{ + return g_strdup_printf ( + "ostree/deploy/%s/backing/%s.%d", ostree_deployment_get_osname (deployment), + ostree_deployment_get_csum (deployment), ostree_deployment_get_deployserial (deployment)); +} + /* Deploy a copy of @target_deployment */ static gboolean clone_deployment (OstreeSysroot *sysroot, OstreeDeployment *target_deployment, diff --git a/src/libotcore/otcore.h b/src/libotcore/otcore.h index 1593e7b77f..4d51398e6a 100644 --- a/src/libotcore/otcore.h +++ b/src/libotcore/otcore.h @@ -55,6 +55,11 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error); // we make it with mode 0 (which requires CAP_DAC_OVERRIDE to pass through). #define OTCORE_RUN_OSTREE_PRIVATE "/run/ostree/.private" +// The directory holding extra/backing data for a deployment, such as overlayfs workdirs +#define OSTREE_DEPLOYMENT_BACKING_DIR "backing" +// The directory holding the root overlayfs +#define OSTREE_DEPLOYMENT_ROOT_TRANSIENT_DIR "root-transient" + // The name of the composefs metadata root #define OSTREE_COMPOSEFS_NAME ".ostree.cfs" // The temporary directory used for the EROFS mount; it's in the .private directory @@ -70,6 +75,8 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error); // This key if present contains the public key successfully used // to verify the signature. #define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed" +// This key will be present if the root is transient +#define OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT "root.transient" // This key will be present if the sysroot-ro flag was found #define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro" diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index 41fc8f98c3..8d0ab88e44 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -87,6 +87,8 @@ #define SYSROOT_KEY "sysroot" #define READONLY_KEY "readonly" +/* This key configures the / mount in the deployment root */ +#define ROOT_KEY "root" #define ETC_KEY "etc" #define TRANSIENT_KEY "transient" @@ -352,6 +354,11 @@ main (int argc, char *argv[]) errx (EXIT_FAILURE, "Failed to parse config: %s", error->message); gboolean sysroot_readonly = FALSE; + gboolean root_transient = FALSE; + + if (!ot_keyfile_get_boolean_with_default (config, ROOT_KEY, TRANSIENT_KEY, FALSE, &root_transient, + &error)) + return FALSE; // We always parse the composefs config, because we want to detect and error // out if it's enabled, but not supported at compile time. @@ -375,6 +382,11 @@ main (int argc, char *argv[]) if (root_mountpoint == NULL) err (EXIT_FAILURE, "realpath(\"%s\")", root_arg); g_autofree char *deploy_path = resolve_deploy_path (root_mountpoint); + const char *deploy_directory_name = glnx_basename (deploy_path); + // Note that realpath() should have stripped any trailing `/` which shouldn't + // be in the karg to start with, but we assert here to be sure we have a non-empty + // filename. + g_assert (deploy_directory_name && *deploy_directory_name); if (mkdirat (AT_FDCWD, OTCORE_RUN_OSTREE, 0755) < 0) err (EXIT_FAILURE, "Failed to create %s", OTCORE_RUN_OSTREE); @@ -510,13 +522,36 @@ main (int argc, char *argv[]) errx (EXIT_FAILURE, "composefs: enabled at runtime, but support is not compiled in"); #endif - if (!using_composefs) + if (root_transient) + { + /* if (using_composefs) + * TODO: Add support to libcomposefs to mount writably; for now we end up with two overlayfs + * which is a bit silly. + */ + + g_autofree char *backingdir = g_strdup_printf ("../../backing/%s", deploy_directory_name); + g_autofree char *workdir + = g_build_filename (backingdir, OSTREE_DEPLOYMENT_ROOT_TRANSIENT_DIR, "work", NULL); + g_autofree char *upperdir + = g_build_filename (backingdir, OSTREE_DEPLOYMENT_ROOT_TRANSIENT_DIR, "upper", NULL); + g_autofree char *ovl_options + = g_strdup_printf ("lowerdir=.,upperdir=%s,workdir=%s", upperdir, workdir); + if (mount ("overlay", TMP_SYSROOT, "overlay", MS_SILENT, ovl_options) < 0) + err (EXIT_FAILURE, "failed to mount transient root overlayfs"); + g_print ("Enabled transient /\n"); + } + else if (!using_composefs) { + g_print ("Using legacy ostree bind mount for /\n"); /* The deploy root starts out bind mounted to sysroot.tmp */ if (mount (deploy_path, TMP_SYSROOT, NULL, MS_BIND | MS_SILENT, NULL) < 0) err (EXIT_FAILURE, "failed to make initial bind mount %s", deploy_path); } + /* Pass on the state */ + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_ROOT_TRANSIENT, + g_variant_new_boolean (root_transient)); + /* This will result in a system with /sysroot read-only. Thus, two additional * writable bind-mounts (for /etc and /var) are required later on. */ if (sysroot_readonly) @@ -548,7 +583,7 @@ main (int argc, char *argv[]) /* Prepare /etc. * No action required if sysroot is writable. Otherwise, a bind-mount for * the deployment needs to be created and remounted as read/write. */ - if (sysroot_readonly || using_composefs) + if (sysroot_readonly || using_composefs || root_transient) { gboolean etc_transient = FALSE; if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE,