From fb575ba34082deb12b5ff1907a44ed6b4f8afa29 Mon Sep 17 00:00:00 2001 From: Alexander Larsson Date: Fri, 29 Sep 2023 13:37:22 +0200 Subject: [PATCH] Support transient /etc If the `prepare-root.conf` file contains: ``` [etc] transient=yes ``` Then during prepare-root, an overlayfs is mounted as /etc, with the upper dir being in /run. If composefs is used, the lower dir is `usr/etc` from the composefs image (which is ralabeled to work as /etc), or it is the deployed `$deploydir/usr/etc`. Note that for this to work with selinux, the commit must have been built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise the lower will have the wrong selinux contexts for the final location. We also set the transient-etc key in the ostree-booted file, pointing it to the upper directory that is used. There are some additional complexities here: * Any system using selinux and using transient etc must enable the new ostree selinux module. Otherwise the overlayfs filesystem will not have enough permissions to access the expected files in etc. * Any /etc files created in the initramfs will not be labeled, because the selinux policy has not been loaded. In addition, the upper dir is on a tmpfs, and any manually set xattr-based selinux labels on those are reset during policy load. To work around this we hook into ostree-remount and relabel all files on /etc that have are from the overlayfs upper dir. * During the initramfs, Systemd mounts /run/machine-id on top of /etc/machine-id, and if this mount exists during boot, then systemd-machine-id-commit.service will remove it and update the real file with its content once etc is writable. This conflicts with the relabeling above as we will relabel the bind mount. To handle this we do the relabeling in a private mount namespace where the machine-id file has been unmounted. * ostree-remount no longer needs to remount /etc read-only in the transient-etc case. Signed-off-by: Alexander Larsson --- Makefile-switchroot.am | 5 ++ man/ostree-prepare-root.xml | 4 ++ src/libotcore/otcore.h | 2 + src/switchroot/ostree-prepare-root.c | 55 +++++++++++++++--- src/switchroot/ostree-remount.c | 85 +++++++++++++++++++++++++++- 5 files changed, 143 insertions(+), 8 deletions(-) diff --git a/Makefile-switchroot.am b/Makefile-switchroot.am index 71a3cbda57..1e458e0e2c 100644 --- a/Makefile-switchroot.am +++ b/Makefile-switchroot.am @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \ ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la +if USE_SELINUX +ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS) +ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS) +endif + if USE_COMPOSEFS ostree_prepare_root_LDADD += libcomposefs.la endif diff --git a/man/ostree-prepare-root.xml b/man/ostree-prepare-root.xml index 820e6a278e..03bf022e27 100644 --- a/man/ostree-prepare-root.xml +++ b/man/ostree-prepare-root.xml @@ -113,6 +113,10 @@ License along with this library. If not, see . sysroot.readonly A boolean value; the default is false. If this is set to true, then the /sysroot mount point is mounted read-only. + + etc.transient + A boolean value; the default is false. If this is set to true, then the /etc mount point is mounted transiently i.e. a non-persistent location. + composefs.enabled This can be yes, no. maybe or diff --git a/src/libotcore/otcore.h b/src/libotcore/otcore.h index ba162b8d14..1593e7b77f 100644 --- a/src/libotcore/otcore.h +++ b/src/libotcore/otcore.h @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error); #define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed" // This key will be present if the sysroot-ro flag was found #define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro" + +#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc" diff --git a/src/switchroot/ostree-prepare-root.c b/src/switchroot/ostree-prepare-root.c index ca4ebb9914..27d06fa7f8 100644 --- a/src/switchroot/ostree-prepare-root.c +++ b/src/switchroot/ostree-prepare-root.c @@ -87,6 +87,9 @@ #define SYSROOT_KEY "sysroot" #define READONLY_KEY "readonly" +#define ETC_KEY "etc" +#define TRANSIENT_KEY "transient" + #define COMPOSEFS_KEY "composefs" #define ENABLED_KEY "enabled" #define KEYPATH_KEY "keypath" @@ -547,13 +550,51 @@ main (int argc, char *argv[]) * the deployment needs to be created and remounted as read/write. */ if (sysroot_readonly || using_composefs) { - /* Bind-mount /etc (at deploy path), and remount as writable. */ - if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) - err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); - if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, - NULL) - < 0) - err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + gboolean etc_transient = FALSE; + if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE, + &etc_transient, &error)) + errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message); + + if (etc_transient) + { + char *ovldir = "/run/ostree/transient-etc"; + + g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, + g_variant_new_string (ovldir)); + + char *lowerdir = "usr/etc"; + if (using_composefs) + lowerdir = TMP_SYSROOT "/usr/etc"; + + g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL); + g_autofree char *workdir = g_build_filename (ovldir, "work", NULL); + + struct + { + const char *path; + int mode; + } subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } }; + for (int i = 0; i < G_N_ELEMENTS (subdirs); i++) + { + if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0) + err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path); + } + + g_autofree char *ovl_options + = g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir); + if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0) + err (EXIT_FAILURE, "failed to mount transient etc overlayfs"); + } + else + { + /* Bind-mount /etc (at deploy path), and remount as writable. */ + if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0) + err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc"); + if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT, + NULL) + < 0) + err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc"); + } } /* Prepare /usr. diff --git a/src/switchroot/ostree-remount.c b/src/switchroot/ostree-remount.c index d8b01f6858..795e1ef3a2 100644 --- a/src/switchroot/ostree-remount.c +++ b/src/switchroot/ostree-remount.c @@ -35,6 +35,9 @@ #include #include #include +#ifdef HAVE_SELINUX +#include +#endif #include "ostree-mount-util.h" #include "otcore.h" @@ -76,6 +79,43 @@ do_remount (const char *target, bool writable) printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target); } +static void +relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir) +{ +#ifdef HAVE_SELINUX + if (selinux_restorecon (real_path, 0)) + g_printerr ("Failed to relabel %s\n", real_path); + + if (!is_dir) + return; + + g_auto (GLnxDirFdIterator) dfd_iter = { + 0, + }; + + if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL)) + g_printerr ("Failed to open directory %s\n", upper_path); + + while (TRUE) + { + struct dirent *dent; + + if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL)) + { + g_printerr ("Failed to read directory %s\n", upper_path); + break; + } + + if (dent == NULL) + break; + + g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL); + g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL); + relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR); + } +#endif +} + int main (int argc, char *argv[]) { @@ -119,6 +159,49 @@ main (int argc, char *argv[]) if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0) perror ("warning: While remounting /sysroot MS_PRIVATE"); + const char *transient_etc = NULL; + g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s", + &transient_etc); + + if (transient_etc) + { + /* Systemd will create a /run/machine-id -> /etc/machine-id bind mount if /etc is + * read-only, and then it will later replace this mount (if it exist) with a real one. + * We need to relabel the file on the overlayfs, below the bind mount, so we unmount + * the covering mount. However, we do so in a temporary private namespace to avoid + * affecting other parts of the system. + */ + + glnx_autofd int initial_ns_fd = -1; + if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS)) + { + initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC); + if (initial_ns_fd < 0) + perror ("Failed to open initial namespace"); + + if (unshare (CLONE_NEWNS) < 0) + perror ("Failed to unshare initial namespace"); + + /* Ensure unmount is not propagated */ + if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0) + perror ("warning: While remounting /etc MS_PRIVATE"); + + if (umount2 ("/etc/machine-id", MNT_DETACH) < 0) + perror ("Failed to unmount machine-id"); + } + + /* If the initramfs created any files in /etc (directly or via overlay copy-up) + * they will be unlabeled, because the selinux policy is not loaded until after + * the pivot-root. So, for all files in the upper dir, relabel the corresponding + * overlay file. + */ + g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL); + relabel_dir_for_upper (upper, "/etc", TRUE); + + if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0) + perror ("Failed to join initial namespace"); + } + gboolean root_is_composefs = FALSE; g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b", &root_is_composefs); @@ -141,7 +224,7 @@ main (int argc, char *argv[]) /* And also make sure to make /etc rw again. We make this conditional on * sysroot_configured_readonly because only in that case is it a bind-mount. */ - if (sysroot_configured_readonly) + if (sysroot_configured_readonly && !transient_etc) do_remount ("/etc", true); /* If /var was created as as an OSTree default bind mount (instead of being a separate