Skip to content

Commit

Permalink
restore: add infrastructure to enable shadow stack
Browse files Browse the repository at this point in the history
There are several gotachs when restoring a task with shadow stack:
* depending on the compiler options, glibc version and glibc tunables
  CRIU can run with or without shadow stack.
* shadow stack VMAs are special, they must be created using a dedicated
  map_shadow_stack() system call and can be modified only by a special
  instruction (wrss) that is only available when shadow stack is
  enabled.
* once shadow stack is enabled, it is not writable even with wrss;
  writes to shadow stack can be only enabled with ptrace() and only when
  shadow stack is enabled in the tracee.
* if the shadow stack is enabled during restore rather than by glibc,
  calling retq after arch_prctl() that enables the shadow stack causes
  #CP, so the function that enables shadow stack can never return.

Add the infrastructure required to cope with all of those:

* modify the restore code to allow trampoline (arch_shstk_trampoline)
  that will enable shadow stack and call restore_task_with_children().
* add call to arch_shstk_unlock() right after the tasks are clone()ed;
  this will allow unlocking shadow stack features and making shadow
  stack writable.
* add stubs for architectures that do not support shadow stacks
* add implementation of arch_shstk_trampoline() and arch_shstk_unlock()
  for x86, but keep it disabled; it will be enabled along with addtion
  of the code that will restore shadow stack in the restorer blob

Signed-off-by: Mike Rapoport (IBM) <[email protected]>
  • Loading branch information
rppt committed Nov 30, 2023
1 parent 9a93472 commit 9513274
Show file tree
Hide file tree
Showing 5 changed files with 199 additions and 1 deletion.
32 changes: 32 additions & 0 deletions criu/arch/x86/include/asm/shstk.h
Original file line number Diff line number Diff line change
@@ -1,6 +1,29 @@
#ifndef __CR_ASM_SHSTK_H__
#define __CR_ASM_SHSTK_H__

/*
* Shadow stack constants from Linux
*/
/* arch/x86/include/uapi/asm/mman.h */
#ifndef SHADOW_STACK_SET_TOKEN
#define SHADOW_STACK_SET_TOKEN 0x1 /* Set up a restore token in the shadow stack */
#endif

/* arch/x86/include/uapi/asm/prctl.h */
#define ARCH_SHSTK_ENABLE 0x5001
#define ARCH_SHSTK_DISABLE 0x5002
#define ARCH_SHSTK_LOCK 0x5003
#define ARCH_SHSTK_UNLOCK 0x5004
#define ARCH_SHSTK_STATUS 0x5005

#define ARCH_SHSTK_SHSTK (1ULL << 0)
#define ARCH_SHSTK_WRSS (1ULL << 1)

#define ARCH_HAS_SHSTK

/* from arch/x86/kernel/shstk.c */
#define SHSTK_DATA_BIT (1UL << 63) /* BIT(63) */

/*
* Shadow stack memory cannot be restored with memcpy/pread but only using
* a special instruction that can write to shadow stack.
Expand Down Expand Up @@ -43,4 +66,13 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,
struct task_restore_args *ta);
#define arch_shstk_prepare arch_shstk_prepare

#if 0
int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid);
#define arch_shstk_unlock arch_shstk_unlock

int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg);
#define arch_shstk_trampoline arch_shstk_trampoline
#endif

#endif /* __CR_ASM_SHSTK_H__ */
133 changes: 133 additions & 0 deletions criu/arch/x86/shstk.c
Original file line number Diff line number Diff line change
@@ -1,3 +1,6 @@
#include <sys/ptrace.h>
#include <sys/wait.h>

#include <common/list.h>

#include <compel/cpu.h>
Expand Down Expand Up @@ -88,3 +91,133 @@ int arch_shstk_prepare(struct pstree_item *item, CoreEntry *core,

return 0;
}

int arch_shstk_unlock(struct pstree_item *item, CoreEntry *core, pid_t pid)
{
unsigned long features;
int status;
int ret = -1;

/*
* CRIU runs with no shadow stack and the task does not need one,
* nothing to do.
*/
if (!kdat.has_shstk && !task_needs_shstk(item, core))
return 0;

futex_wait_until(&rsti(item)->shstk_enable, 1);

if (ptrace(PTRACE_SEIZE, pid, 0, 0)) {
pr_perror("Cannot attach to %d", pid);
goto futex_wake;
}

if (ptrace(PTRACE_INTERRUPT, pid, 0, 0)) {
pr_perror("Cannot interrupt the %d task", pid);
goto detach;
}

if (wait4(pid, &status, __WALL, NULL) != pid) {
pr_perror("waitpid(%d) failed", pid);
goto detach;
}

features = ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS;
if (ptrace(PTRACE_ARCH_PRCTL, pid, features, ARCH_SHSTK_UNLOCK)) {
pr_perror("Cannot unlock CET for %d task", pid);
goto detach;
}

detach:
if (ptrace(PTRACE_DETACH, pid, NULL, 0)) {
pr_perror("Unable to detach %d", pid);
goto futex_wake;
}

ret = 0;

futex_wake:
futex_set_and_wake(&rsti(item)->shstk_unlock, 1);

return ret;
}

static void shstk_sync_unlock(struct pstree_item *item)
{
/* notify parent that shadow stack is enabled ... */
futex_set_and_wake(&rsti(item)->shstk_enable, 1);

/* ... and wait until it unlocks its features with ptrace */
futex_wait_until(&rsti(item)->shstk_unlock, 1);
}

static void __arch_shstk_enable(struct pstree_item *item,
int (*func)(void *arg), void *arg)
{
int ret;

shstk_sync_unlock(item);

/* return here would cause #CP, use exit() instead */
ret = func(arg);
exit(ret);
}

static int shstk_disable(struct pstree_item *item)
{
shstk_sync_unlock(item);

/* disable shadow stack, implicitly clears ARCH_SHSTK_WRSS */
if (syscall(__NR_arch_prctl, ARCH_SHSTK_DISABLE, ARCH_SHSTK_SHSTK)) {
pr_perror("Failed to disable writes to shadow stack\n");
return -1;
}

if (syscall(__NR_arch_prctl, ARCH_SHSTK_LOCK,
ARCH_SHSTK_SHSTK | ARCH_SHSTK_WRSS)) {
pr_perror("Failed to lock shadow stack controls\n");
return -1;
}

return 0;
}

int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
unsigned long features = ARCH_SHSTK_SHSTK;
int code = ARCH_SHSTK_ENABLE;

/*
* If task does not need shadow stack but CRIU runs with shadow
* stack enabled, we should disable it before continuing with
* restore
*/
if (!task_needs_shstk(item, core)) {
if (kdat.has_shstk && shstk_disable(item))
return -1;
return func(arg);
}

/*
* Calling sys_arch_prctl() means there will be use of retq
* instruction after shadow stack is enabled and this will cause
* Control Protectiond fault. Open code sys_arch_prctl() in
* assembly.
*
* code and addr should be in %rdi and %rsi and will be passed to
* the system call as is.
*/
asm volatile("movq $"__stringify(__NR_arch_prctl)", %%rax \n"
"syscall \n"
"cmpq $0, %%rax \n"
"je 1f \n"
"retq \n"
"1: \n"
:: "D"(code), "S"(features));

__arch_shstk_enable(item, func, arg);

/* never reached */
return -1;
}
14 changes: 13 additions & 1 deletion criu/cr-restore.c
Original file line number Diff line number Diff line change
Expand Up @@ -1498,6 +1498,8 @@ static inline int fork_with_pid(struct pstree_item *item)
pr_debug("PID: real %d virt %d\n", item->pid->real, vpid(item));
}

arch_shstk_unlock(item, ca.core, pid);

err_unlock:
if (!(ca.clone_flags & CLONE_NEWPID))
unlock_last_pid();
Expand Down Expand Up @@ -1764,7 +1766,7 @@ static int create_children_and_session(void)
return 0;
}

static int restore_task_with_children(void *_arg)
static int __restore_task_with_children(void *_arg)
{
struct cr_clone_arg *ca = _arg;
pid_t pid;
Expand Down Expand Up @@ -1956,6 +1958,16 @@ static int restore_task_with_children(void *_arg)
exit(1);
}

static int restore_task_with_children(void *_arg)
{
struct cr_clone_arg *arg = _arg;
struct pstree_item *item = arg->item;
CoreEntry *core = arg->core;

return arch_shstk_trampoline(item, core, __restore_task_with_children,
arg);
}

static int attach_to_tasks(bool root_seized)
{
struct pstree_item *item;
Expand Down
18 changes: 18 additions & 0 deletions criu/include/restore.h
Original file line number Diff line number Diff line change
Expand Up @@ -20,4 +20,22 @@ static inline int arch_shstk_prepare(struct pstree_item *item,
#define arch_shstk_prepare arch_shstk_prepare
#endif

#ifndef arch_shstk_unlock
static inline int arch_shstk_unlock(struct pstree_item *item,
CoreEntry *core, pid_t pid)
{
return 0;
}
#define arch_shstk_unlock arch_shstk_unlock
#endif

#ifndef arch_shstk_trampoline
static inline int arch_shstk_trampoline(struct pstree_item *item, CoreEntry *core,
int (*func)(void *arg), void *arg)
{
return func(arg);
}
#define arch_shstk_trampoline arch_shstk_trampoline
#endif

#endif
3 changes: 3 additions & 0 deletions criu/include/rst_info.h
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,9 @@ struct rst_info {

struct rst_rseq *rseqe;

futex_t shstk_enable;
futex_t shstk_unlock;

void *breakpoint;
};

Expand Down

0 comments on commit 9513274

Please sign in to comment.