diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index fe36ad88a7..6dd99554cd 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -30,15 +30,16 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } - configureCgroup := cgroupMode != "disabled" - if configureCgroup { + hasCapSysAdmin, err := unshare.HasCapSysAdmin() + if err != nil { + return err + } + + // check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set. + if os.Geteuid() == 0 && hasCapSysAdmin { // do it only after podman has already re-execed and running with uid==0. - hasCapSysAdmin, err := unshare.HasCapSysAdmin() - if err != nil { - return err - } - // check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set. - if os.Geteuid() == 0 && hasCapSysAdmin { + configureCgroup := cgroupMode != "disabled" + if configureCgroup { ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() if err != nil { logrus.Infof("Failed to detect the owner for the current cgroup: %v", err) @@ -55,8 +56,8 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } } } - return nil } + return nil } pausePidPath, err := util.GetRootlessPauseProcessPidPath() @@ -88,7 +89,7 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, } if len(paths) > 0 { - became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, true, paths) + became, ret, err = rootless.TryJoinFromFilePaths(pausePidPath, paths) } else { became, ret, err = rootless.BecomeRootInUserNS(pausePidPath) if err == nil { diff --git a/pkg/rootless/rootless.go b/pkg/rootless/rootless.go index 3d654b1f35..fd906e96d7 100644 --- a/pkg/rootless/rootless.go +++ b/pkg/rootless/rootless.go @@ -24,7 +24,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) { return false, -1, err } - became, ret, err := TryJoinFromFilePaths("", false, []string{pausePidPath}) + became, ret, err := TryJoinFromFilePaths("", []string{pausePidPath}) if err == nil { return became, ret, nil } @@ -45,7 +45,7 @@ func TryJoinPauseProcess(pausePidPath string) (bool, int, error) { }() // Now the pause PID file is locked. Try to join once again in case it changed while it was not locked. - became, ret, err = TryJoinFromFilePaths("", false, []string{pausePidPath}) + became, ret, err = TryJoinFromFilePaths("", []string{pausePidPath}) if err != nil { // It is still failing. We can safely remove it. os.Remove(pausePidPath) diff --git a/pkg/rootless/rootless_freebsd.go b/pkg/rootless/rootless_freebsd.go index 28c1a5e15d..3d88d05225 100644 --- a/pkg/rootless/rootless_freebsd.go +++ b/pkg/rootless/rootless_freebsd.go @@ -38,11 +38,7 @@ func GetRootlessGID() int { // This is useful when there are already running containers and we // don't have a pause process yet. We can use the paths to the conmon // processes to attempt joining their namespaces. -// If needNewNamespace is set, the file is read from a temporary user -// namespace, this is useful for containers that are running with a -// different uidmap and the unprivileged user has no way to read the -// file owned by the root in the container. -func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { +func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } diff --git a/pkg/rootless/rootless_linux.c b/pkg/rootless/rootless_linux.c index 6e4702946b..ddb97b3346 100644 --- a/pkg/rootless/rootless_linux.c +++ b/pkg/rootless/rootless_linux.c @@ -947,49 +947,8 @@ check_proc_sys_userns_file (const char *path) } } -static int -copy_file_to_fd (const char *file_to_read, int outfd) -{ - char buf[512]; - cleanup_close int fd = -1; - - fd = open (file_to_read, O_RDONLY); - if (fd < 0) - { - fprintf (stderr, "open `%s`: %m\n", file_to_read); - return fd; - } - - for (;;) - { - ssize_t r, w, t = 0; - - r = TEMP_FAILURE_RETRY (read (fd, buf, sizeof buf)); - if (r < 0) - { - fprintf (stderr, "read from `%s`: %m\n", file_to_read); - return r; - } - - if (r == 0) - break; - - while (t < r) - { - w = TEMP_FAILURE_RETRY (write (outfd, &buf[t], r - t)); - if (w < 0) - { - fprintf (stderr, "write file to output fd `%s`: %m\n", file_to_read); - return w; - } - t += w; - } - } - return 0; -} - int -reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_read, int outputfd) +reexec_in_user_namespace (int ready, char *pause_pid_file_path) { cleanup_free char **argv = NULL; cleanup_free char *argv0 = NULL; @@ -1138,13 +1097,6 @@ reexec_in_user_namespace (int ready, char *pause_pid_file_path, char *file_to_re _exit (EXIT_FAILURE); } - if (file_to_read && file_to_read[0]) - { - ret = copy_file_to_fd (file_to_read, outputfd); - close (outputfd); - _exit (ret == 0 ? EXIT_SUCCESS : EXIT_FAILURE); - } - execvp ("/proc/self/exe", argv); fprintf (stderr, "failed to reexec: %m\n"); diff --git a/pkg/rootless/rootless_linux.go b/pkg/rootless/rootless_linux.go index 61133ababa..3ad3906241 100644 --- a/pkg/rootless/rootless_linux.go +++ b/pkg/rootless/rootless_linux.go @@ -32,7 +32,7 @@ import ( #include extern uid_t rootless_uid(); extern uid_t rootless_gid(); -extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path, char *file_to_read, int fd); +extern int reexec_in_user_namespace(int ready, char *pause_pid_file_path); extern int reexec_in_user_namespace_wait(int pid, int options); extern int reexec_userns_join(int pid, char *pause_pid_file_path); extern int is_fd_inherited(int fd); @@ -213,7 +213,7 @@ func copyMappings(from, to string) error { return os.WriteFile(to, content, 0600) } -func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ bool, _ int, retErr error) { +func becomeRootInUserNS(pausePid string) (_ bool, _ int, retErr error) { hasCapSysAdmin, err := unshare.HasCapSysAdmin() if err != nil { return false, 0, err @@ -249,13 +249,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo cPausePid := C.CString(pausePid) defer C.free(unsafe.Pointer(cPausePid)) - cFileToRead := C.CString(fileToRead) - defer C.free(unsafe.Pointer(cFileToRead)) - var fileOutputFD C.int - if fileOutput != nil { - fileOutputFD = C.int(fileOutput.Fd()) - } - runtime.LockOSThread() defer runtime.UnlockOSThread() @@ -287,7 +280,7 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo } }() - pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid, cFileToRead, fileOutputFD) + pidC := C.reexec_in_user_namespace(C.int(r.Fd()), cPausePid) pid = int(pidC) if pid < 0 { return false, -1, fmt.Errorf("cannot re-exec process") @@ -361,14 +354,6 @@ func becomeRootInUserNS(pausePid, fileToRead string, fileOutput *os.File) (_ boo return false, -1, fmt.Errorf("read from sync pipe: %w", err) } - if fileOutput != nil { - ret := C.reexec_in_user_namespace_wait(pidC, 0) - if ret < 0 { - return false, -1, errors.New("waiting for the re-exec process") - } - return true, 0, nil - } - if b[0] == '2' { // We have lost the race for writing the PID file, as probably another // process created a namespace and wrote the PID. @@ -434,69 +419,27 @@ func waitAndProxySignalsToChild(pid C.int) (bool, int, error) { // If podman was re-executed the caller needs to propagate the error code returned by the child // process. func BecomeRootInUserNS(pausePid string) (bool, int, error) { - return becomeRootInUserNS(pausePid, "", nil) + return becomeRootInUserNS(pausePid) } // TryJoinFromFilePaths attempts to join the namespaces of the pid files in paths. // This is useful when there are already running containers and we // don't have a pause process yet. We can use the paths to the conmon // processes to attempt joining their namespaces. -// If needNewNamespace is set, the file is read from a temporary user -// namespace, this is useful for containers that are running with a -// different uidmap and the unprivileged user has no way to read the -// file owned by the root in the container. -func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { +func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) { var lastErr error - var pausePid int for _, path := range paths { - if !needNewNamespace { - data, err := os.ReadFile(path) - if err != nil { - lastErr = err - continue - } - - pausePid, err = strconv.Atoi(string(data)) - if err != nil { - lastErr = fmt.Errorf("cannot parse file %q: %w", path, err) - continue - } - } else { - r, w, err := os.Pipe() - if err != nil { - lastErr = err - continue - } - - defer errorhandling.CloseQuiet(r) - - if _, _, err := becomeRootInUserNS("", path, w); err != nil { - w.Close() - lastErr = err - continue - } - - if err := w.Close(); err != nil { - return false, 0, err - } - defer func() { - C.reexec_in_user_namespace_wait(-1, 0) - }() - - b := make([]byte, 32) - - n, err := r.Read(b) - if err != nil { - lastErr = fmt.Errorf("cannot read %q: %w", path, err) - continue - } + data, err := os.ReadFile(path) + if err != nil { + lastErr = err + continue + } - pausePid, err = strconv.Atoi(string(b[:n])) - if err != nil { - lastErr = err - continue - } + pausePid, err := strconv.Atoi(string(data)) + if err != nil { + lastErr = fmt.Errorf("cannot parse file %q: %w", path, err) + continue } if pausePid > 0 && unix.Kill(pausePid, 0) == nil { diff --git a/pkg/rootless/rootless_unsupported.go b/pkg/rootless/rootless_unsupported.go index 0d587644fd..73282fb2a6 100644 --- a/pkg/rootless/rootless_unsupported.go +++ b/pkg/rootless/rootless_unsupported.go @@ -41,11 +41,7 @@ func GetRootlessGID() int { // This is useful when there are already running containers and we // don't have a pause process yet. We can use the paths to the conmon // processes to attempt joining their namespaces. -// If needNewNamespace is set, the file is read from a temporary user -// namespace, this is useful for containers that are running with a -// different uidmap and the unprivileged user has no way to read the -// file owned by the root in the container. -func TryJoinFromFilePaths(pausePidPath string, needNewNamespace bool, paths []string) (bool, int, error) { +func TryJoinFromFilePaths(pausePidPath string, paths []string) (bool, int, error) { return false, -1, errors.New("this function is not supported on this os") } diff --git a/test/system/550-pause-process.bats b/test/system/550-pause-process.bats index eee770f27a..cd38609306 100644 --- a/test/system/550-pause-process.bats +++ b/test/system/550-pause-process.bats @@ -119,7 +119,7 @@ function _check_pause_process() { # First let's run a container in the background to keep the userns active local cname1=c1_$(random_string) - run_podman run -d --name $cname1 $IMAGE top + run_podman run -d --name $cname1 --uidmap 0:100:100 $IMAGE top run_podman unshare readlink /proc/self/ns/user userns="$output" @@ -136,6 +136,9 @@ function _check_pause_process() { _test_sigproxy $cname2 $kidpid + # check pause process again + _check_pause_process + # our container exits 0 so podman should too wait $kidpid || die "podman run exited $? instead of zero"