Skip to content

Commit

Permalink
pkg/rootless: simplify reexec for container code
Browse files Browse the repository at this point in the history
The code currently tried to avoid joining the userns from conmon
directly and rather joined to only read the pid file and then send this
back to use so we could join the userns. From the comment this was done
because we could not read the pid file. However this is no longer true
as of commit 49eb5af and file is no always owned by the real user.

This means we can just remove this special logic and join the namespace
directly there. A test has been added to check the rejoin logic with a
custom uidmapping.

Signed-off-by: Paul Holzinger <[email protected]>
  • Loading branch information
Luap99 committed Jul 8, 2024
1 parent c276b28 commit 3350cd3
Show file tree
Hide file tree
Showing 7 changed files with 24 additions and 134 deletions.
2 changes: 1 addition & 1 deletion pkg/domain/infra/abi/system_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool,
}

if len(paths) > 0 {
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths)
became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths)
} else {
became, ret, err = rootless.BecomeRootInUserNS(pausePidPath)
if err == nil {
Expand Down
4 changes: 2 additions & 2 deletions pkg/rootless/rootless.go
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
return false, -1, err
}

became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath})
if err == nil {
return became, ret, nil
}
Expand All @@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) {
}()

// Now the pause PID file is locked. Try to join once again in case it changed while it was not locked.
became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath})
became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath})
if err != nil {
// It is still failing. We can safely remove it.
os.Remove(pausePidPath)
Expand Down
6 changes: 1 addition & 5 deletions pkg/rootless/rootless_freebsd.go
Original file line number Diff line number Diff line change
Expand Up @@ -38,11 +38,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

Expand Down
50 changes: 1 addition & 49 deletions pkg/rootless/rootless_linux.c
Original file line number Diff line number Diff line change
Expand Up @@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path)
}
}

static int
copy_file_to_fd (const char *file_to_read, int outfd)
{
char buf[512];
cleanup_close int fd = -1;

fd = open (file_to_read, O_RDONLY);
if (fd < 0)
{
fprintf (stderr, "open `%s`: %m\n", file_to_read);
return fd;
}

for (;;)
{
ssize_t r, w, t = 0;

r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf));
if (r < 0)
{
fprintf (stderr, "read from `%s`: %m\n", file_to_read);
return r;
}

if (r == 0)
break;

while (t < r)
{
w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t));
if (w < 0)
{
fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read);
return w;
}
t += w;
}
}
return 0;
}

int
reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd)
reexec_in_user_namespace (int ready, char *pause_pid_file_path)
{
cleanup_free char **argv = NULL;
cleanup_free char *argv0 = NULL;
Expand Down Expand Up @@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re
_exit (EXIT_FAILURE);
}

if (file_to_read && file_to_read[0])
{
ret = copy_file_to_fd (file_to_read, outputfd);
close (outputfd);
_exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE);
}

execvp ("/proc/self/exe", argv);
fprintf (stderr, "failed to reexec: %m\n");

Expand Down
85 changes: 14 additions & 71 deletions pkg/rootless/rootless_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ import (
#include <sys/types.h>
extern uid_t rootless_uid();
extern uid_t rootless_gid();
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd);
extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path);
extern int reexec_in_user_namespace_wait(int pid, int options);
extern int reexec_userns_join(int pid, char *pause_pid_file_path);
extern int is_fd_inherited(int fd);
Expand Down Expand Up @@ -213,7 +213,7 @@ func copyMappings(from, to string) error {
return os.WriteFile(to, content, 0600)
}

func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) {
func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) {
hasCapSysAdmin, err := unshare.HasCapSysAdmin()
if err != nil {
return false, 0, err
Expand Down Expand Up @@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
cPausePid := C.CString(pausePid)
defer C.free(unsafe.Pointer(cPausePid))

cFileToRead := C.CString(fileToRead)
defer C.free(unsafe.Pointer(cFileToRead))
var fileOutputFD C.int
if fileOutput != nil {
fileOutputFD = C.int(fileOutput.Fd())
}

runtime.LockOSThread()
defer runtime.UnlockOSThread()

Expand Down Expand Up @@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
}
}()

pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD)
pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid)
pid = int(pidC)
if pid < 0 {
return false, -1, fmt.Errorf("cannot re-exec process")
Expand Down Expand Up @@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo
return false, -1, fmt.Errorf("read from sync pipe: %w", err)
}

if fileOutput != nil {
ret := C.reexec_in_user_namespace_wait(pidC, 0)
if ret < 0 {
return false, -1, errors.New("waiting for the re-exec process")
}
return true, 0, nil
}

if b[0] == '2' {
// We have lost the race for writing the PID file, as probably another
// process created a namespace and wrote the PID.
Expand Down Expand Up @@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) {
// If podman was re-executed the caller needs to propagate the error code returned by the child
// process.
func BecomeRootInUserNS(pausePid string) (bool, int, error) {
return becomeRootInUserNS(pausePid, "", nil)
return becomeRootInUserNS(pausePid)
}

// TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths.
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
var lastErr error
var pausePid int

for _, path := range paths {
if !needNewNamespace {
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}

pausePid, err = strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}
} else {
r, w, err := os.Pipe()
if err != nil {
lastErr = err
continue
}

defer errorhandling.CloseQuiet(r)

if _, _, err := becomeRootInUserNS("", path, w); err != nil {
w.Close()
lastErr = err
continue
}

if err := w.Close(); err != nil {
return false, 0, err
}
defer func() {
C.reexec_in_user_namespace_wait(-1, 0)
}()

b := make([]byte, 32)

n, err := r.Read(b)
if err != nil {
lastErr = fmt.Errorf("cannot read %q: %w", path, err)
continue
}
data, err := os.ReadFile(path)
if err != nil {
lastErr = err
continue
}

pausePid, err = strconv.Atoi(string(b[:n]))
if err != nil {
lastErr = err
continue
}
pausePid, err := strconv.Atoi(string(data))
if err != nil {
lastErr = fmt.Errorf("cannot parse file %q: %w", path, err)
continue
}

if pausePid > 0 && unix.Kill(pausePid, 0) == nil {
Expand Down
6 changes: 1 addition & 5 deletions pkg/rootless/rootless_unsupported.go
Original file line number Diff line number Diff line change
Expand Up @@ -41,11 +41,7 @@ func GetRootlessGID() int {
// This is useful when there are already running containers and we
// don't have a pause process yet. We can use the paths to the conmon
// processes to attempt joining their namespaces.
// If needNewNamespace is set, the file is read from a temporary user
// namespace, this is useful for containers that are running with a
// different uidmap and the unprivileged user has no way to read the
// file owned by the root in the container.
func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) {
func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) {
return false, -1, errors.New("this function is not supported on this os")
}

Expand Down
5 changes: 4 additions & 1 deletion test/system/550-pause-process.bats
Original file line number Diff line number Diff line change
Expand Up @@ -119,7 +119,7 @@ function _check_pause_process() {

# First let's run a container in the background to keep the userns active
local cname1=c1_$(random_string)
run_podman run -d --name $cname1 $IMAGE top
run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top

run_podman unshare readlink /proc/self/ns/user
userns="$output"
Expand All @@ -136,6 +136,9 @@ function _check_pause_process() {

_test_sigproxy $cname2 $kidpid

# check pause process again
_check_pause_process

# our container exits 0 so podman should too
wait $kidpid || die "podman run exited $? instead of zero"

Expand Down

0 comments on commit 3350cd3

Please sign in to comment.