Skip to content

Commit

Permalink
Support transient /etc
Browse files Browse the repository at this point in the history
If the `prepare-root.conf` file contains:
```
[etc]
transient=yes
```

Then during prepare-root, an overlayfs is mounted as /etc, with the upper
dir being in /run. If composefs is used, the lower dir is `usr/etc` from
the composefs image (which is ralabeled to work as /etc), or it is the
deployed `$deploydir/usr/etc`.

Note that for this to work with selinux, the commit must have been
built with OSTREE_REPO_COMMIT_MODIFIER_FLAGS_USRETC_AS_ETC. Otherwise
the lower will have the wrong selinux contexts for the final location.

We also set the transient-etc key in the ostree-booted file, pointing it
to the upper directory that is used.

There are some additional complexities here:

 * Any system using selinux and using transient etc must enable the
   new ostree selinux module. Otherwise the overlayfs filesystem will
   not have enough permissions to access the expected files in etc.

 * Any /etc files created in the initramfs will not be labeled,
   because the selinux policy has not been loaded. In addition, the
   upper dir is on a tmpfs, and any manually set xattr-based selinux
   labels on those are reset during policy load. To work around
   this we hook into ostree-remount and relabel all files on /etc
   that have are from the overlayfs upper dir.

 * During the initramfs, Systemd mounts /run/machine-id on top
   of /etc/machine-id, and if this mount exists during boot, then
   systemd-machine-id-commit.service will remove it and update
   the real file with its content once etc is writable. This
   conflicts with the relabeling above as we will relabel the
   bind mount. To handle this we do the relabeling in a private
   mount namespace where the machine-id file has been unmounted.

 * ostree-remount no longer needs to remount /etc read-only in the
   transient-etc case.

Signed-off-by: Alexander Larsson <[email protected]>
  • Loading branch information
alexlarsson committed Oct 2, 2023
1 parent 4b65d2d commit fb575ba
Show file tree
Hide file tree
Showing 5 changed files with 143 additions and 8 deletions.
5 changes: 5 additions & 0 deletions Makefile-switchroot.am
Original file line number Diff line number Diff line change
Expand Up @@ -63,6 +63,11 @@ ostree_remount_SOURCES = \
ostree_remount_CPPFLAGS = $(AM_CPPFLAGS) $(OT_INTERNAL_GIO_UNIX_CFLAGS) -Isrc/switchroot -I$(srcdir)/src/libotcore -I$(srcdir)/src/libotutil -I$(srcdir)/libglnx
ostree_remount_LDADD = $(AM_LDFLAGS) $(OT_INTERNAL_GIO_UNIX_LIBS) libotcore.la libotutil.la libglnx.la

if USE_SELINUX
ostree_remount_CPPFLAGS += $(OT_DEP_SELINUX_CFLAGS)
ostree_remount_LDADD += $(OT_DEP_SELINUX_LIBS)
endif

if USE_COMPOSEFS
ostree_prepare_root_LDADD += libcomposefs.la
endif
Expand Down
4 changes: 4 additions & 0 deletions man/ostree-prepare-root.xml
Original file line number Diff line number Diff line change
Expand Up @@ -113,6 +113,10 @@ License along with this library. If not, see <https://www.gnu.org/licenses/>.
<term><varname>sysroot.readonly</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/sysroot</literal> mount point is mounted read-only.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>etc.transient</varname></term>
<listitem><para>A boolean value; the default is <literal>false</literal>. If this is set to <literal>true</literal>, then the <literal>/etc</literal> mount point is mounted transiently i.e. a non-persistent location.</para></listitem>
</varlistentry>
<varlistentry>
<term><varname>composefs.enabled</varname></term>
<listitem><para>This can be <literal>yes</literal>, <literal>no</literal>. <literal>maybe</literal> or
Expand Down
2 changes: 2 additions & 0 deletions src/libotcore/otcore.h
Original file line number Diff line number Diff line change
Expand Up @@ -72,3 +72,5 @@ GKeyFile *otcore_load_config (int rootfs, const char *filename, GError **error);
#define OTCORE_RUN_BOOTED_KEY_COMPOSEFS_SIGNATURE "composefs.signed"
// This key will be present if the sysroot-ro flag was found
#define OTCORE_RUN_BOOTED_KEY_SYSROOT_RO "sysroot-ro"

#define OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC "transient-etc"
55 changes: 48 additions & 7 deletions src/switchroot/ostree-prepare-root.c
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,9 @@
#define SYSROOT_KEY "sysroot"
#define READONLY_KEY "readonly"

#define ETC_KEY "etc"
#define TRANSIENT_KEY "transient"

#define COMPOSEFS_KEY "composefs"
#define ENABLED_KEY "enabled"
#define KEYPATH_KEY "keypath"
Expand Down Expand Up @@ -547,13 +550,51 @@ main (int argc, char *argv[])
* the deployment needs to be created and remounted as read/write. */
if (sysroot_readonly || using_composefs)
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
gboolean etc_transient = FALSE;
if (!ot_keyfile_get_boolean_with_default (config, ETC_KEY, TRANSIENT_KEY, FALSE,
&etc_transient, &error))
errx (EXIT_FAILURE, "Failed to parse etc.transient value: %s", error->message);

if (etc_transient)
{
char *ovldir = "/run/ostree/transient-etc";

g_variant_builder_add (&metadata_builder, "{sv}", OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC,
g_variant_new_string (ovldir));

char *lowerdir = "usr/etc";
if (using_composefs)
lowerdir = TMP_SYSROOT "/usr/etc";

g_autofree char *upperdir = g_build_filename (ovldir, "upper", NULL);
g_autofree char *workdir = g_build_filename (ovldir, "work", NULL);

struct
{
const char *path;
int mode;
} subdirs[] = { { ovldir, 0700 }, { upperdir, 0755 }, { workdir, 0755 } };
for (int i = 0; i < G_N_ELEMENTS (subdirs); i++)
{
if (mkdirat (AT_FDCWD, subdirs[i].path, subdirs[i].mode) < 0)
err (EXIT_FAILURE, "Failed to create dir %s", subdirs[i].path);
}

g_autofree char *ovl_options
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", lowerdir, upperdir, workdir);
if (mount ("overlay", TMP_SYSROOT "/etc", "overlay", MS_SILENT, ovl_options) < 0)
err (EXIT_FAILURE, "failed to mount transient etc overlayfs");
}
else
{
/* Bind-mount /etc (at deploy path), and remount as writable. */
if (mount ("etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_SILENT, NULL) < 0)
err (EXIT_FAILURE, "failed to prepare /etc bind-mount at /sysroot.tmp/etc");
if (mount (TMP_SYSROOT "/etc", TMP_SYSROOT "/etc", NULL, MS_BIND | MS_REMOUNT | MS_SILENT,
NULL)
< 0)
err (EXIT_FAILURE, "failed to make writable /etc bind-mount at /sysroot.tmp/etc");
}
}

/* Prepare /usr.
Expand Down
85 changes: 84 additions & 1 deletion src/switchroot/ostree-remount.c
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,9 @@
#include <sys/stat.h>
#include <sys/statvfs.h>
#include <unistd.h>
#ifdef HAVE_SELINUX
#include <selinux/restorecon.h>
#endif

#include "ostree-mount-util.h"
#include "otcore.h"
Expand Down Expand Up @@ -76,6 +79,43 @@ do_remount (const char *target, bool writable)
printf ("Remounted %s: %s\n", writable ? "rw" : "ro", target);
}

static void
relabel_dir_for_upper (const char *upper_path, const char *real_path, gboolean is_dir)
{
#ifdef HAVE_SELINUX
if (selinux_restorecon (real_path, 0))
g_printerr ("Failed to relabel %s\n", real_path);

if (!is_dir)
return;

g_auto (GLnxDirFdIterator) dfd_iter = {
0,
};

if (!glnx_dirfd_iterator_init_at (AT_FDCWD, upper_path, FALSE, &dfd_iter, NULL))
g_printerr ("Failed to open directory %s\n", upper_path);

while (TRUE)
{
struct dirent *dent;

if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, NULL, NULL))
{
g_printerr ("Failed to read directory %s\n", upper_path);
break;
}

if (dent == NULL)
break;

g_autofree char *upper_child = g_build_filename (upper_path, dent->d_name, NULL);
g_autofree char *real_child = g_build_filename (real_path, dent->d_name, NULL);
relabel_dir_for_upper (upper_child, real_child, dent->d_type == DT_DIR);
}
#endif
}

int
main (int argc, char *argv[])
{
Expand Down Expand Up @@ -119,6 +159,49 @@ main (int argc, char *argv[])
if (mount ("none", "/sysroot", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /sysroot MS_PRIVATE");

const char *transient_etc = NULL;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_TRANSIENT_ETC, "&s",
&transient_etc);

if (transient_etc)
{
/* Systemd will create a /run/machine-id -> /etc/machine-id bind mount if /etc is
* read-only, and then it will later replace this mount (if it exist) with a real one.
* We need to relabel the file on the overlayfs, below the bind mount, so we unmount
* the covering mount. However, we do so in a temporary private namespace to avoid
* affecting other parts of the system.
*/

glnx_autofd int initial_ns_fd = -1;
if (g_file_test ("/run/machine-id", G_FILE_TEST_EXISTS))
{
initial_ns_fd = open ("/proc/self/ns/mnt", O_RDONLY | O_NOCTTY | O_CLOEXEC);
if (initial_ns_fd < 0)
perror ("Failed to open initial namespace");

if (unshare (CLONE_NEWNS) < 0)
perror ("Failed to unshare initial namespace");

/* Ensure unmount is not propagated */
if (mount ("none", "/etc", NULL, MS_REC | MS_PRIVATE, NULL) < 0)
perror ("warning: While remounting /etc MS_PRIVATE");

if (umount2 ("/etc/machine-id", MNT_DETACH) < 0)
perror ("Failed to unmount machine-id");
}

/* If the initramfs created any files in /etc (directly or via overlay copy-up)
* they will be unlabeled, because the selinux policy is not loaded until after
* the pivot-root. So, for all files in the upper dir, relabel the corresponding
* overlay file.
*/
g_autofree char *upper = g_build_filename (transient_etc, "upper", NULL);
relabel_dir_for_upper (upper, "/etc", TRUE);

if (initial_ns_fd != -1 && setns (initial_ns_fd, CLONE_NEWNS) < 0)
perror ("Failed to join initial namespace");
}

gboolean root_is_composefs = FALSE;
g_variant_dict_lookup (ostree_run_metadata, OTCORE_RUN_BOOTED_KEY_COMPOSEFS, "b",
&root_is_composefs);
Expand All @@ -141,7 +224,7 @@ main (int argc, char *argv[])

/* And also make sure to make /etc rw again. We make this conditional on
* sysroot_configured_readonly because only in that case is it a bind-mount. */
if (sysroot_configured_readonly)
if (sysroot_configured_readonly && !transient_etc)
do_remount ("/etc", true);

/* If /var was created as as an OSTree default bind mount (instead of being a separate
Expand Down

0 comments on commit fb575ba

Please sign in to comment.