Skip to content

Commit

Permalink
Support transient /etc
Browse files Browse the repository at this point in the history
If the `prepare-root.conf` file contains:
```
[etc]
transient=yes
```

Then during prepare-root, an overlayfs is mounted as /etc, with the upper
dir being in /run. If composefs is used, the lower dir is `usr/etc` from
the composefs image (which is ralabeled to work as /etc), or it is the
deployed `$deploydir/usr/etc`.

Note that for this to work with selinux, the commit must have been
built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise
the lower will have the wrong selinux contexts for the final location.

We also set the transient-etc key in the ostree-booted file, pointing it
to the upper directory that is used.

There are some additional complexities here:

 * Semi-recent versions of selinux-poliy have issues with the overlayfs
   mount being kernel_t, and that is not allowed to manage files. This
   should be mostly fixed in selinux-policy-38.21 but some further
   details need to be ironed out.

 * Any /etc files created in the initramfs will not be labeled,
   because the selinux policy has not been loaded. In addition, the
   upper dir is on a tmpfs, and any manually set xattr-based selinux
   labels on those are reset during policy load. To work around
   this we hook into ostree-remount and relabel all files on /etc
   that have are from the overlayfs upper dir.

 * During the initramfs, Systemd mounts /run/machine-id on top of
   /etc/machine-id, and if this mount exists during later boot, then
   systemd-machine-id-commit.service will remove it and update the
   real file with its content once etc is writable. To ensure that
   this keeps working, we need to re-add this bind mount in the
   remounted /etc if /run/machine-id existst.

 * ostree-remount no longer needs to remount /etc read-only in the
   transient-etc case.

Signed-off-by: Alexander Larsson <[email protected]>
  • Loading branch information
alexlarsson committed Oct 5, 2023
1 parent 163d6fb commit 29facec
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 8 deletions.
5 changes: 5 additions & 0 deletions Makefile-switchroot.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \
ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx
ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la

if USE_SELINUX
ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS)
ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS)
endif

if USE_COMPOSEFS
ostree_prepare_root_LDADD += libcomposefs.la
endif
Expand Down
4 changes: 4 additions & 0 deletions man/ostree-prepare-root.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ License along with this library. If not, see <https://www.gnu.org/licenses/>.
<term><varname>sysroot.readonly</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/sysroot</literal> mount point is mounted read-only.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>etc.transient</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/etc</literal> mount point is mounted transiently i.e. a non-persistent location.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>composefs.enabled</varname></term>
<listitem><para>This can be <literal>yes</literal>, <literal>no</literal>. <literal>maybe</literal> or
Expand Down
2 changes: 2 additions & 0 deletions src/libotcore/otcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error);
#define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed"
// This key will be present if the sysroot-ro flag was found
#define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro"

#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc"
55 changes: 48 additions & 7 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@
#define SYSROOT_KEY "sysroot"
#define READONLY_KEY "readonly"

#define ETC_KEY "etc"
#define TRANSIENT_KEY "transient"

#define COMPOSEFS_KEY "composefs"
#define ENABLED_KEY "enabled"
#define KEYPATH_KEY "keypath"
Expand Down Expand Up @@ -547,13 +550,51 @@ main (int argc, char *argv[])
* the deployment needs to be created and remounted as read/write. */
if (sysroot_readonly || using_composefs)
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
gboolean etc_transient = FALSE;
if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE,
&etc_transient, &error))
errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message);

if (etc_transient)
{
char *ovldir = "/run/ostree/transient-etc";

g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC,
g_variant_new_string (ovldir));

char *lowerdir = "usr/etc";
if (using_composefs)
lowerdir = TMP_SYSROOT "/usr/etc";

g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL);
g_autofree char *workdir = g_build_filename (ovldir, "work", NULL);

struct
{
const char *path;
int mode;
} subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } };
for (int i = 0; i < G_N_ELEMENTS (subdirs); i++)
{
if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0)
err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path);
}

g_autofree char *ovl_options
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir);
if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0)
err (EXIT_FAILURE, "failed to mount transient etc overlayfs");
}
else
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
}
}

/* Prepare /usr.
Expand Down
85 changes: 84 additions & 1 deletion src/switchroot/ostree-remount.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <unistd.h>
#ifdef HAVE_SELINUX
#include <selinux/restorecon.h>
#endif

#include "ostree-mount-util.h"
#include "otcore.h"
Expand Down Expand Up @@ -76,6 +79,43 @@ do_remount (const char *target, bool writable)
printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target);
}

static void
relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir)
{
#ifdef HAVE_SELINUX
if (selinux_restorecon (real_path, 0))
g_printerr ("Failed to relabel %s\n", real_path);

if (!is_dir)
return;

g_auto (GLnxDirFdIterator) dfd_iter = {
0,
};

if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL))
g_printerr ("Failed to open directory %s\n", upper_path);

while (TRUE)
{
struct dirent *dent;

if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL))
{
g_printerr ("Failed to read directory %s\n", upper_path);
break;
}

if (dent == NULL)
break;

g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL);
g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL);
relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR);
}
#endif
}

int
main (int argc, char *argv[])
{
Expand Down Expand Up @@ -119,6 +159,49 @@ main (int argc, char *argv[])
if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /sysroot MS_PRIVATE");

const char *transient_etc = NULL;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s",
&transient_etc);

if (transient_etc)
{
/* Systemd will create a /run/machine-id -> /etc/machine-id bind mount if /etc is
* read-only, and then it will later replace this mount (if it exist) with a real one.
* We need to relabel the file on the overlayfs, below the bind mount, so we unmount
* the covering mount. However, we do so in a temporary private namespace to avoid
* affecting other parts of the system.
*/

glnx_autofd int initial_ns_fd = -1;
if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS))
{
initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (initial_ns_fd < 0)
perror ("Failed to open initial namespace");

if (unshare (CLONE_NEWNS) < 0)
perror ("Failed to unshare initial namespace");

/* Ensure unmount is not propagated */
if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /etc MS_PRIVATE");

if (umount2 ("/etc/machine-id", MNT_DETACH) < 0)
perror ("Failed to unmount machine-id");
}

/* If the initramfs created any files in /etc (directly or via overlay copy-up)
* they will be unlabeled, because the selinux policy is not loaded until after
* the pivot-root. So, for all files in the upper dir, relabel the corresponding
* overlay file.
*/
g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL);
relabel_dir_for_upper (upper, "/etc", TRUE);

if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0)
perror ("Failed to join initial namespace");
}

gboolean root_is_composefs = FALSE;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b",
&root_is_composefs);
Expand All @@ -141,7 +224,7 @@ main (int argc, char *argv[])

/* And also make sure to make /etc rw again. We make this conditional on
* sysroot_configured_readonly because only in that case is it a bind-mount. */
if (sysroot_configured_readonly)
if (sysroot_configured_readonly && !transient_etc)
do_remount ("/etc", true);

/* If /var was created as as an OSTree default bind mount (instead of being a separate
Expand Down

0 comments on commit 29facec

Please sign in to comment.