-
Notifications
You must be signed in to change notification settings - Fork 305
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
In the OSTree model, executables go in `/usr`, state in `/var` and configuration in `/etc`. Software that lives in `/opt` however messes this up because it often mixes code *and* state, making it harder to manage. More generally, it's sometimes useful to have the OSTree commit contain code under a certain path, but still allow that path to be writable by software and the sysadmin at runtime (`/usr/local` is another instance). Add the concept of state overlays. A state overlay is an overlayfs mount whose upper directory, which contains unmanaged state, is carried forward on top of a lower directory, containing OSTree-managed files. In the example of `/usr/local`, OSTree commits can ship content there, all while allowing users to e.g. add scripts in `/usr/local/bin` when booted into that commit. Some reconciliation logic is executed whenever the base is updated so that newer files in the base are never shadowed by a copied up version in the upper directory. This matches RPM semantics when upgrading packages whose files may have been modified. For ease of integration, this is exposed as a systemd template unit which any downstream distro/user can enable. The instance name is the mountpath in escaped systemd path notation (e.g. `[email protected]`). See discussions in #3113 for more details.
- Loading branch information
Showing
7 changed files
with
430 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -42,6 +42,7 @@ systemdsystemunit_DATA = src/boot/ostree-prepare-root.service \ | |
src/boot/ostree-finalize-staged.service \ | ||
src/boot/ostree-finalize-staged.path \ | ||
src/boot/ostree-finalize-staged-hold.service \ | ||
src/boot/[email protected] \ | ||
$(NULL) | ||
systemdtmpfilesdir = $(prefix)/lib/tmpfiles.d | ||
dist_systemdtmpfiles_DATA = src/boot/ostree-tmpfiles.conf | ||
|
@@ -72,6 +73,7 @@ EXTRA_DIST += src/boot/dracut/module-setup.sh \ | |
src/boot/ostree-remount.service \ | ||
src/boot/ostree-finalize-staged.service \ | ||
src/boot/ostree-finalize-staged-hold.service \ | ||
src/boot/[email protected] \ | ||
src/boot/grub2/grub2-15_ostree \ | ||
src/boot/grub2/ostree-grub-generator \ | ||
$(NULL) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,36 @@ | ||
# Copyright (C) 2023 Red Hat Inc. | ||
# | ||
# This library is free software; you can redistribute it and/or | ||
# modify it under the terms of the GNU Lesser General Public | ||
# License as published by the Free Software Foundation; either | ||
# version 2 of the License, or (at your option) any later version. | ||
# | ||
# This library is distributed in the hope that it will be useful, | ||
# but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
# Lesser General Public License for more details. | ||
# | ||
# You should have received a copy of the GNU Lesser General Public | ||
# License along with this library. If not, see <https://www.gnu.org/licenses/>. | ||
|
||
[Unit] | ||
Description=OSTree State Overlay On /%I | ||
Documentation=man:ostree(1) | ||
DefaultDependencies=no | ||
ConditionKernelCommandLine=ostree | ||
# run after /var is setup since that's where the upperdir is stored | ||
# and after boot.mount so we can load the sysroot | ||
After=var.mount boot.mount | ||
# but before local-fs.target, which we consider ourselves a part of | ||
Before=local-fs.target | ||
|
||
[Service] | ||
Type=oneshot | ||
RemainAfterExit=yes | ||
ExecStart=/usr/bin/ostree admin state-overlay %i /%I | ||
StandardInput=null | ||
StandardOutput=journal | ||
StandardError=journal+console | ||
|
||
[Install] | ||
WantedBy=local-fs.target |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,242 @@ | ||
/* Copyright (C) 2023 Red Hat, Inc. | ||
* | ||
* SPDX-License-Identifier: LGPL-2.0+ | ||
* | ||
* This library is free software; you can redistribute it and/or | ||
* modify it under the terms of the GNU Lesser General Public | ||
* License as published by the Free Software Foundation; either | ||
* version 2 of the License, or (at your option) any later version. | ||
* | ||
* This library is distributed in the hope that it will be useful, | ||
* but WITHOUT ANY WARRANTY; without even the implied warranty of | ||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU | ||
* Lesser General Public License for more details. | ||
* | ||
* You should have received a copy of the GNU Lesser General Public | ||
* License along with this library. If not, see <https://www.gnu.org/licenses/>. | ||
*/ | ||
|
||
#include "config.h" | ||
|
||
#include <fcntl.h> | ||
#include <glib-unix.h> | ||
#include <sched.h> | ||
#include <stdlib.h> | ||
#include <sys/mount.h> | ||
|
||
#include "glnx-errors.h" | ||
#include "glnx-fdio.h" | ||
#include "glnx-local-alloc.h" | ||
#include "glnx-shutil.h" | ||
#include "glnx-xattrs.h" | ||
#include "ot-admin-builtins.h" | ||
|
||
#define OSTREE_STATEOVERLAYS_DIR "/var/ostree/state-overlays" | ||
#define OSTREE_STATEOVERLAY_UPPER_DIR "upper" | ||
#define OSTREE_STATEOVERLAY_WORK_DIR "work" | ||
|
||
/* https://www.kernel.org/doc/html/latest/filesystems/overlayfs.html */ | ||
#define OVERLAYFS_DIR_XATTR_OPAQUE "trusted.overlay.opaque" | ||
|
||
static GOptionEntry options[] = { { NULL } }; | ||
|
||
static gboolean | ||
ensure_overlay_dirs (const char *overlay_dir, int *out_overlay_dfd, GCancellable *cancellable, | ||
GError **error) | ||
{ | ||
glnx_autofd int overlay_dfd = -1; | ||
if (!glnx_shutil_mkdir_p_at_open (AT_FDCWD, overlay_dir, 0755, &overlay_dfd, cancellable, error)) | ||
return FALSE; | ||
|
||
if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_WORK_DIR, 0755, cancellable, error)) | ||
return FALSE; | ||
if (!glnx_shutil_mkdir_p_at (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, 0755, cancellable, error)) | ||
return FALSE; | ||
|
||
*out_overlay_dfd = glnx_steal_fd (&overlay_dfd); | ||
return TRUE; | ||
} | ||
|
||
static gboolean | ||
is_opaque_dir (int dfd, const char *dname, gboolean *out_is_opaque, GError **error) | ||
{ | ||
/* XXX: this is basically like a `glnx_lgetxattrat_allow_noent()`; upstream it */ | ||
|
||
char pathbuf[PATH_MAX]; | ||
snprintf (pathbuf, sizeof (pathbuf), "/proc/self/fd/%d/%s", dfd, dname); | ||
|
||
ssize_t bytes_read, real_size; | ||
if (TEMP_FAILURE_RETRY (bytes_read = lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, NULL, 0)) | ||
< 0) | ||
{ | ||
if (errno != ENODATA) | ||
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE); | ||
*out_is_opaque = FALSE; | ||
return TRUE; | ||
} | ||
|
||
g_autofree guint8 *buf = g_malloc (bytes_read); | ||
if (TEMP_FAILURE_RETRY (real_size | ||
= lgetxattr (pathbuf, OVERLAYFS_DIR_XATTR_OPAQUE, buf, bytes_read)) | ||
< 0) | ||
return glnx_throw_errno_prefix (error, "lgetxattr(%s)", OVERLAYFS_DIR_XATTR_OPAQUE); | ||
|
||
*out_is_opaque = (real_size == 1 && buf[0] == 'y'); | ||
return TRUE; | ||
} | ||
|
||
static gboolean | ||
prune_upperdir_recurse (int lower_dfd, int upper_dfd, GCancellable *cancellable, GError **error) | ||
{ | ||
g_auto (GLnxDirFdIterator) dfd_iter = { 0 }; | ||
if (!glnx_dirfd_iterator_init_at (upper_dfd, ".", FALSE, &dfd_iter, error)) | ||
return FALSE; | ||
|
||
while (TRUE) | ||
{ | ||
struct dirent *dent = NULL; | ||
if (!glnx_dirfd_iterator_next_dent_ensure_dtype (&dfd_iter, &dent, cancellable, error)) | ||
return FALSE; | ||
if (dent == NULL) | ||
break; | ||
|
||
/* do we have an entry of the same name in the lowerdir? */ | ||
struct stat stbuf; | ||
if (!glnx_fstatat_allow_noent (lower_dfd, dent->d_name, &stbuf, AT_SYMLINK_NOFOLLOW, error)) | ||
return FALSE; | ||
if (errno == ENOENT) | ||
continue; /* state file (i.e. upperdir only); carry on */ | ||
|
||
/* ok, it shadows; are they both directories? */ | ||
if (dent->d_type == DT_DIR && S_ISDIR (stbuf.st_mode)) | ||
{ | ||
/* is the directory opaque? this stmt expr brought to you by the Rust lobbying group */ | ||
gboolean is_opaque = FALSE; | ||
if (!is_opaque_dir (upper_dfd, dent->d_name, &is_opaque, error)) | ||
return FALSE; | ||
|
||
if (!is_opaque) | ||
{ | ||
/* recurse */ | ||
glnx_autofd int lower_subdfd = -1; | ||
if (!glnx_opendirat (lower_dfd, dent->d_name, FALSE, &lower_subdfd, error)) | ||
return FALSE; | ||
glnx_autofd int upper_subdfd = -1; | ||
if (!glnx_opendirat (upper_dfd, dent->d_name, FALSE, &upper_subdfd, error)) | ||
return FALSE; | ||
if (!prune_upperdir_recurse (lower_subdfd, upper_subdfd, cancellable, error)) | ||
return glnx_prefix_error (error, "in %s", dent->d_name); | ||
|
||
continue; | ||
} | ||
|
||
/* fallthrough; implicitly delete opaque directories */ | ||
} | ||
|
||
/* any other case, we prune (this also implicitly covers whiteouts and opaque dirs) */ | ||
if (dent->d_type == DT_DIR) | ||
{ | ||
if (!glnx_shutil_rm_rf_at (upper_dfd, dent->d_name, cancellable, error)) | ||
return FALSE; | ||
} | ||
/* just unlinkat(); saves one openat() call */ | ||
else if (!glnx_unlinkat (upper_dfd, dent->d_name, 0, error)) | ||
return FALSE; | ||
} | ||
|
||
return TRUE; | ||
} | ||
|
||
static gboolean | ||
prune_upperdir (int sysroot_fd, const char *deployment_path, const char *mountpath, int overlay_dfd, | ||
GCancellable *cancellable, GError **error) | ||
{ | ||
glnx_autofd int lower_dfd = -1; | ||
if (!glnx_opendirat (AT_FDCWD, mountpath, FALSE, &lower_dfd, error)) | ||
return FALSE; | ||
|
||
glnx_autofd int upper_dfd = -1; | ||
if (!glnx_opendirat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, FALSE, &upper_dfd, error)) | ||
return FALSE; | ||
|
||
if (!prune_upperdir_recurse (lower_dfd, upper_dfd, cancellable, error)) | ||
return FALSE; | ||
|
||
/* touch upperdir to mark prune as completed */ | ||
if (utimensat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, NULL, 0) < 0) | ||
return glnx_throw_errno_prefix (error, "futimens(upper)"); | ||
|
||
return TRUE; | ||
} | ||
|
||
static gboolean | ||
mount_overlay (const char *mountpath, const char *name, GError **error) | ||
{ | ||
/* we could use /proc/self/... with overlay_dfd to avoid these allocations, | ||
* but this gets stringified into the options field in the mount table, and | ||
* being cryptic is not helpful */ | ||
g_autofree char *upperdir | ||
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_UPPER_DIR, NULL); | ||
g_autofree char *workdir | ||
= g_build_filename (OSTREE_STATEOVERLAYS_DIR, name, OSTREE_STATEOVERLAY_WORK_DIR, NULL); | ||
g_autofree char *ovl_options | ||
= g_strdup_printf ("lowerdir=%s,upperdir=%s,workdir=%s", mountpath, upperdir, workdir); | ||
if (mount ("overlay", mountpath, "overlay", MS_SILENT, ovl_options) < 0) | ||
return glnx_throw_errno_prefix (error, "mount(%s)", mountpath); | ||
|
||
return TRUE; | ||
} | ||
|
||
/* Called by [email protected]. */ | ||
gboolean | ||
ot_admin_builtin_state_overlay (int argc, char **argv, OstreeCommandInvocation *invocation, | ||
GCancellable *cancellable, GError **error) | ||
{ | ||
g_autoptr (GOptionContext) context = g_option_context_new ("NAME MOUNTPATH"); | ||
g_autoptr (OstreeSysroot) sysroot = NULL; | ||
|
||
/* First parse the args without loading the sysroot to see what options are | ||
* set. */ | ||
if (!ostree_admin_option_context_parse (context, options, &argc, &argv, | ||
OSTREE_ADMIN_BUILTIN_FLAG_NONE, invocation, &sysroot, | ||
cancellable, error)) | ||
return FALSE; | ||
|
||
if (argc < 3) | ||
return glnx_throw (error, "Missing NAME or MOUNTPATH"); | ||
|
||
/* Sanity-check */ | ||
OstreeDeployment *booted_deployment = ostree_sysroot_get_booted_deployment (sysroot); | ||
if (booted_deployment == NULL) | ||
return glnx_throw (error, "Must be booted into an OSTree deployment"); | ||
|
||
const char *overlay_name = argv[1]; | ||
const char *mountpath = argv[2]; | ||
|
||
glnx_autofd int overlay_dfd = -1; | ||
g_autofree char *overlay_dir = g_build_filename (OSTREE_STATEOVERLAYS_DIR, overlay_name, NULL); | ||
if (!ensure_overlay_dirs (overlay_dir, &overlay_dfd, cancellable, error)) | ||
return FALSE; | ||
|
||
struct stat stbuf_upper; | ||
if (!glnx_fstatat (overlay_dfd, OSTREE_STATEOVERLAY_UPPER_DIR, &stbuf_upper, 0, error)) | ||
return FALSE; | ||
|
||
/* We don't use "/" directly here because that may have e.g. an overlay | ||
* slapped on from root.transient or composefs. */ | ||
g_autofree char *deployment_path | ||
= ostree_sysroot_get_deployment_dirpath (sysroot, booted_deployment); | ||
struct stat stbuf_lower; | ||
if (!glnx_fstatat (ostree_sysroot_get_fd (sysroot), deployment_path, &stbuf_lower, 0, error)) | ||
return FALSE; | ||
|
||
if (stbuf_upper.st_mtime < stbuf_lower.st_mtime) | ||
{ | ||
/* the lowerdir was updated; prune the upperdir */ | ||
if (!prune_upperdir (ostree_sysroot_get_fd (sysroot), deployment_path, mountpath, overlay_dfd, | ||
cancellable, error)) | ||
return glnx_prefix_error (error, "Pruning upperdir for %s", overlay_name); | ||
} | ||
|
||
return mount_overlay (mountpath, overlay_name, error); | ||
} |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.