From 640c4a079bd9fd4591026e76835e0e938f0bda9e Mon Sep 17 00:00:00 2001 From: lifubang Date: Wed, 30 Oct 2024 01:08:32 +0800 Subject: [PATCH] try joining the namespaces twice We should join as many namespaces as possible first except the user namespace, because there may be some ns paths are not owned by the user namespace we want to join, then we can join remainning namespaces after we join/unshare user ns. Please see #4390. Signed-off-by: lifubang --- libcontainer/nsenter/nsexec.c | 72 +++++++++++++++++++++++++---------- 1 file changed, 51 insertions(+), 21 deletions(-) diff --git a/libcontainer/nsenter/nsexec.c b/libcontainer/nsenter/nsexec.c index 565b2ca2030..627ddc40e65 100644 --- a/libcontainer/nsenter/nsexec.c +++ b/libcontainer/nsenter/nsexec.c @@ -95,6 +95,12 @@ struct nlconfig_t { size_t timensoffset_len; }; +struct namespace_t { + int fd; + char type[PATH_MAX]; + char path[PATH_MAX]; +}; + /* * List of netlink message types sent to us as part of bootstrapping the init. * These constants are defined in libcontainer/message_linux.go. @@ -444,16 +450,11 @@ void nl_free(struct nlconfig_t *config) free(config->data); } -void join_namespaces(char *nslist) +struct namespace_t * init_namespaces(char *nslist, int *num) { - int num = 0, i; char *saveptr = NULL; char *namespace = strtok_r(nslist, ",", &saveptr); - struct namespace_t { - int fd; - char type[PATH_MAX]; - char path[PATH_MAX]; - } *namespaces = NULL; + struct namespace_t *namespaces = NULL; if (!namespace || !strlen(namespace) || !strlen(nslist)) bail("ns paths are empty"); @@ -469,10 +470,10 @@ void join_namespaces(char *nslist) struct namespace_t *ns; /* Resize the namespace array. */ - namespaces = realloc(namespaces, ++num * sizeof(struct namespace_t)); + namespaces = realloc(namespaces, ++*num * sizeof(struct namespace_t)); if (!namespaces) bail("failed to reallocate namespace array"); - ns = &namespaces[num - 1]; + ns = &namespaces[*num - 1]; /* Split 'ns:path'. */ path = strstr(namespace, ":"); @@ -490,20 +491,40 @@ void join_namespaces(char *nslist) ns->path[PATH_MAX - 1] = '\0'; } while ((namespace = strtok_r(NULL, ",", &saveptr)) != NULL); - /* - * The ordering in which we join namespaces is important. We should - * always join the user namespace *first*. This is all guaranteed - * from the container_linux.go side of this, so we're just going to - * follow the order given to us. - */ + return namespaces; +} + +void join_namespaces(struct namespace_t *namespaces, int num, bool ignoreError) { + int i; for (i = 0; i < num; i++) { struct namespace_t *ns = &namespaces[i]; int flag = nsflag(ns->type); + if (ns->fd < 0) + continue; + + /* + * The ordering in which we join namespaces is important. We should join + * as many namespaces as possible *first* except the user namespace, + * because there may be some ns paths are not owned by the user namespace + * we want to join, then we can join remainning namespaces after we + * join/unshare user ns. (#4390) + * + * When we join remaining namespaces or for rootless container, we should + * always join the user namespace *first*. This is all guaranteed from the + * container_linux.go side of this, so we're just going to follow the order + * given to us. + */ + if (ignoreError && flag == CLONE_NEWUSER) + continue; + write_log(DEBUG, "setns(%#x) into %s namespace (with path %s)", flag, ns->type, ns->path); - if (setns(ns->fd, flag) < 0) - bail("failed to setns into %s namespace", ns->type); + if (setns(ns->fd, flag) < 0) { + if (!ignoreError) + bail("failed to setns into %s namespace", ns->type); + continue; + } /* * If we change user namespaces, make sure we switch to root in the @@ -517,9 +538,8 @@ void join_namespaces(char *nslist) } close(ns->fd); + ns->fd = -1; } - - free(namespaces); } static inline int sane_kill(pid_t pid, int signum) @@ -840,6 +860,8 @@ void nsexec(void) case STAGE_CHILD:{ pid_t stage2_pid = -1; enum sync_t s; + int nslen = 0; + struct namespace_t *namespaces = NULL; /* For debugging. */ current_stage = STAGE_CHILD; @@ -859,8 +881,11 @@ void nsexec(void) * [stage 2: STAGE_INIT]) would be meaningless). We could send it * using cmsg(3) but that's just annoying. */ - if (config.namespaces) - join_namespaces(config.namespaces); + if (config.namespaces) { + namespaces = init_namespaces(config.namespaces, &nslen); + if (nslen > 0) + join_namespaces(namespaces, nslen, !config.is_rootless_euid); + } /* * Deal with user namespaces first. They are quite special, as they @@ -923,6 +948,11 @@ void nsexec(void) if (setresuid(0, 0, 0) < 0) bail("failed to become root in user namespace"); } + /* Join remainning namespaces after we join/unshare user ns. */ + if (nslen > 0) { + join_namespaces(namespaces, nslen, false); + free(namespaces); + } /* * Unshare all of the namespaces. Now, it should be noted that this