diff --git a/go.mod b/go.mod index 431fc05abb..8ac2e6db50 100644 --- a/go.mod +++ b/go.mod @@ -55,7 +55,7 @@ require ( github.com/onsi/gomega v1.34.1 github.com/opencontainers/go-digest v1.0.0 github.com/opencontainers/image-spec v1.1.0 - github.com/opencontainers/runc v1.1.13 + github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5 github.com/opencontainers/runtime-spec v1.2.0 github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc github.com/opencontainers/selinux v1.11.0 @@ -224,5 +224,3 @@ require ( gopkg.in/tomb.v1 v1.0.0-20141024135613-dd632973f1e7 // indirect tags.cncf.io/container-device-interface/specs-go v0.8.0 // indirect ) - -replace github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.1-0.20240131200429-02120488a4c0 diff --git a/go.sum b/go.sum index a34090a729..f95dd562e4 100644 --- a/go.sum +++ b/go.sum @@ -390,8 +390,8 @@ github.com/opencontainers/go-digest v1.0.0 h1:apOUWs51W5PlhuyGyz9FCeeBIOUDA/6nW8 github.com/opencontainers/go-digest v1.0.0/go.mod h1:0JzlMkj0TRzQZfJkVvzbP0HBR3IKzErnv2BNG4W4MAM= github.com/opencontainers/image-spec v1.1.0 h1:8SG7/vwALn54lVB/0yZ/MMwhFrPYtpEHQb2IpWsCzug= github.com/opencontainers/image-spec v1.1.0/go.mod h1:W4s4sFTMaBeK1BQLXbG4AdM2szdn85PY75RI83NrTrM= -github.com/opencontainers/runc v1.1.1-0.20240131200429-02120488a4c0 h1:NwSQ/5rex97Rum/xZOMjlDQbbZ8YJKOTihf9sxqHxtE= -github.com/opencontainers/runc v1.1.1-0.20240131200429-02120488a4c0/go.mod h1:tBsQqk9ETVlXxzXjk2Xh/1VjxC/U3Gaq5ps/rC/cadE= +github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5 h1:VqTLG6pS4DlCwEAiwoYoQ3kXnhYCEeHB85vsYeM5ico= +github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5/go.mod h1:H8njh/SD+WY9bYMmVsEEWDJgJdviOSDjNeXMjeNbYCE= github.com/opencontainers/runtime-spec v1.2.0 h1:z97+pHb3uELt/yiAWD691HNHQIF07bE7dzrbT927iTk= github.com/opencontainers/runtime-spec v1.2.0/go.mod h1:jwyrGlmzljRJv/Fgzds9SsS/C5hL+LL3ko9hs6T5lQ0= github.com/opencontainers/runtime-tools v0.9.1-0.20230914150019-408c51e934dc h1:d2hUh5O6MRBvStV55MQ8we08t42zSTqBbscoQccWmMc= diff --git a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go index 684248f255..4484cd2397 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/apparmor/apparmor_unsupported.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux package apparmor diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go index b9ba889b7a..811f2d26e0 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/cgroups.go @@ -12,7 +12,8 @@ var ( ErrDevicesUnsupported = errors.New("cgroup manager is not configured to set device rules") // DevicesSetV1 and DevicesSetV2 are functions to set devices for - // cgroup v1 and v2, respectively. Unless libcontainer/cgroups/devices + // cgroup v1 and v2, respectively. Unless + // [github.com/opencontainers/runc/libcontainer/cgroups/devices] // package is imported, it is set to nil, so cgroup managers can't // manage devices. DevicesSetV1 func(path string, r *configs.Resources) error diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go index 16aae5a3b7..78c5bcf0d3 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/file.go @@ -57,6 +57,40 @@ func WriteFile(dir, file, data string) error { return nil } +// WriteFileByLine is the same as WriteFile, except if data contains newlines, +// it is written line by line. +func WriteFileByLine(dir, file, data string) error { + i := strings.Index(data, "\n") + if i == -1 { + return WriteFile(dir, file, data) + } + + fd, err := OpenFile(dir, file, unix.O_WRONLY) + if err != nil { + return err + } + defer fd.Close() + start := 0 + for { + var line string + if i == -1 { + line = data[start:] + } else { + line = data[start : start+i+1] + } + _, err := fd.WriteString(line) + if err != nil { + return fmt.Errorf("failed to write %q: %w", line, err) + } + if i == -1 { + break + } + start += i + 1 + i = strings.Index(data[start:], "\n") + } + return nil +} + const ( cgroupfsDir = "/sys/fs/cgroup" cgroupfsPrefix = cgroupfsDir + "/" diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go index 727f7f9184..62574b53c5 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs/cpu.go @@ -35,15 +35,31 @@ func (s *CpuGroup) Apply(path string, r *configs.Resources, pid int) error { } func (s *CpuGroup) SetRtSched(path string, r *configs.Resources) error { + var period string if r.CpuRtPeriod != 0 { - if err := cgroups.WriteFile(path, "cpu.rt_period_us", strconv.FormatUint(r.CpuRtPeriod, 10)); err != nil { - return err + period = strconv.FormatUint(r.CpuRtPeriod, 10) + if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { + // The values of cpu.rt_period_us and cpu.rt_runtime_us + // are inter-dependent and need to be set in a proper order. + // If the kernel rejects the new period value with EINVAL + // and the new runtime value is also being set, let's + // ignore the error for now and retry later. + if !errors.Is(err, unix.EINVAL) || r.CpuRtRuntime == 0 { + return err + } + } else { + period = "" } } if r.CpuRtRuntime != 0 { if err := cgroups.WriteFile(path, "cpu.rt_runtime_us", strconv.FormatInt(r.CpuRtRuntime, 10)); err != nil { return err } + if period != "" { + if err := cgroups.WriteFile(path, "cpu.rt_period_us", period); err != nil { + return err + } + } } return nil } @@ -89,9 +105,11 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { if r.CpuBurst != nil { burst = strconv.FormatUint(*r.CpuBurst, 10) if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { - // this is a special trick for burst feature, the current systemd and low version of kernel will not support it. - // So, an `no such file or directory` error would be raised, and we can ignore it . - if !errors.Is(err, unix.ENOENT) { + if errors.Is(err, unix.ENOENT) { + // If CPU burst knob is not available (e.g. + // older kernel), ignore it. + burst = "" + } else { // Sometimes when the burst to be set is larger // than the current one, it is rejected by the kernel // (EINVAL) as old_quota/new_burst exceeds the parent @@ -117,9 +135,7 @@ func (s *CpuGroup) Set(path string, r *configs.Resources) error { } if burst != "" { if err := cgroups.WriteFile(path, "cpu.cfs_burst_us", burst); err != nil { - if !errors.Is(err, unix.ENOENT) { - return err - } + return err } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go index 0760be74b9..b1be7df5cc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/fs2.go @@ -233,7 +233,7 @@ func (m *Manager) setUnified(res map[string]string) error { if strings.Contains(k, "/") { return fmt.Errorf("unified resource %q must be a file name (no slashes)", k) } - if err := cgroups.WriteFile(m.dirPath, k, v); err != nil { + if err := cgroups.WriteFileByLine(m.dirPath, k, v); err != nil { // Check for both EPERM and ENOENT since O_CREAT is used by WriteFile. if errors.Is(err, os.ErrPermission) || errors.Is(err, os.ErrNotExist) { // Check if a controller is available, diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go index 2965659742..df8336ba0f 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/fs2/memory.go @@ -57,7 +57,10 @@ func setMemory(dirPath string, r *configs.Resources) error { // never write empty string to `memory.swap.max`, it means set to 0. if swapStr != "" { if err := cgroups.WriteFile(dirPath, "memory.swap.max", swapStr); err != nil { - return err + // If swap is not enabled, silently ignore setting to max or disabling it. + if !(errors.Is(err, os.ErrNotExist) && (swapStr == "max" || swapStr == "0")) { + return err + } } } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go index 186cbc6413..d303cf204c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/cgroups/utils.go @@ -136,18 +136,18 @@ func GetAllSubsystems() ([]string, error) { return subsystems, nil } -func readProcsFile(dir string) ([]int, error) { - f, err := OpenFile(dir, CgroupProcesses, os.O_RDONLY) +func readProcsFile(dir string) (out []int, _ error) { + file := CgroupProcesses + retry := true + +again: + f, err := OpenFile(dir, file, os.O_RDONLY) if err != nil { return nil, err } defer f.Close() - var ( - s = bufio.NewScanner(f) - out = []int{} - ) - + s := bufio.NewScanner(f) for s.Scan() { if t := s.Text(); t != "" { pid, err := strconv.Atoi(t) @@ -157,6 +157,13 @@ func readProcsFile(dir string) ([]int, error) { out = append(out, pid) } } + if errors.Is(s.Err(), unix.ENOTSUP) && retry { + // For a threaded cgroup, read returns ENOTSUP, and we should + // read from cgroup.threads instead. + file = "cgroup.threads" + retry = false + goto again + } return out, s.Err() } @@ -275,9 +282,7 @@ func RemovePaths(paths map[string]string) (err error) { } } if len(paths) == 0 { - //nolint:ineffassign,staticcheck // done to help garbage collecting: opencontainers/runc#2506 - // TODO: switch to clear once Go < 1.21 is not supported. - paths = make(map[string]string) + clear(paths) return nil } return fmt.Errorf("Failed to remove paths: %v", paths) diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go index 7e383020f4..53f5ec5a0d 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/cgroup_unsupported.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go index a0a79d19d5..22fe0f9b4c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/config.go @@ -222,6 +222,9 @@ type Config struct { // Personality contains configuration for the Linux personality syscall. Personality *LinuxPersonality `json:"personality,omitempty"` + + // IOPriority is the container's I/O priority. + IOPriority *IOPriority `json:"io_priority,omitempty"` } // Scheduler is based on the Linux sched_setattr(2) syscall. @@ -283,6 +286,14 @@ func ToSchedAttr(scheduler *Scheduler) (*unix.SchedAttr, error) { }, nil } +var IOPrioClassMapping = map[specs.IOPriorityClass]int{ + specs.IOPRIO_CLASS_RT: 1, + specs.IOPRIO_CLASS_BE: 2, + specs.IOPRIO_CLASS_IDLE: 3, +} + +type IOPriority = specs.LinuxIOPriority + type ( HookName string HookList []Hook diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go index bce829e290..1fd87ce6a4 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/configs_fuzzer.go @@ -1,5 +1,4 @@ //go:build gofuzz -// +build gofuzz package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go index 2154191215..1d4d9fe52a 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/mount_unsupported.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go index 15d8046f3d..26b70b26fa 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall.go @@ -1,5 +1,4 @@ //go:build linux -// +build linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go index fbb0d49071..10bf243650 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_syscall_unsupported.go @@ -1,5 +1,4 @@ //go:build !linux && !windows -// +build !linux,!windows package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go index 946db30a54..914684993c 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/configs/namespaces_unsupported.go @@ -1,5 +1,4 @@ //go:build !linux -// +build !linux package configs diff --git a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go index 7d8e9fc310..d00775f514 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/devices/device_unix.go @@ -1,5 +1,4 @@ //go:build !windows -// +build !windows package devices diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go index b225f18f2e..a07afe07bc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns.go @@ -1,4 +1,8 @@ package userns -// RunningInUserNS detects whether we are currently running in a user namespace. -var RunningInUserNS = runningInUserNS +// RunningInUserNS detects whether we are currently running in a Linux +// user namespace and memoizes the result. It returns false on non-Linux +// platforms. +func RunningInUserNS() bool { + return inUserNS() +} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go index a6710b321b..a1462e13bc 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux.go @@ -7,32 +7,26 @@ import ( "sync" ) -var ( - inUserNS bool - nsOnce sync.Once -) +var inUserNS = sync.OnceValue(runningInUserNS) // runningInUserNS detects whether we are currently running in a user namespace. // // Originally copied from https://github.com/lxc/incus/blob/e45085dd42f826b3c8c3228e9733c0b6f998eafe/shared/util.go#L678-L700. func runningInUserNS() bool { - nsOnce.Do(func() { - file, err := os.Open("/proc/self/uid_map") - if err != nil { - // This kernel-provided file only exists if user namespaces are supported. - return - } - defer file.Close() - - buf := bufio.NewReader(file) - l, _, err := buf.ReadLine() - if err != nil { - return - } - - inUserNS = uidMapInUserNS(string(l)) - }) - return inUserNS + file, err := os.Open("/proc/self/uid_map") + if err != nil { + // This kernel-provided file only exists if user namespaces are supported. + return false + } + defer file.Close() + + buf := bufio.NewReader(file) + l, _, err := buf.ReadLine() + if err != nil { + return false + } + + return uidMapInUserNS(string(l)) } func uidMapInUserNS(uidMap string) bool { diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux_fuzzer.go similarity index 74% rename from vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go rename to vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux_fuzzer.go index bff03f8d85..26ba2e16ec 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_fuzzer.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_linux_fuzzer.go @@ -1,5 +1,4 @@ -//go:build gofuzz -// +build gofuzz +//go:build linux && gofuzz package userns diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c deleted file mode 100644 index 84f2c6188c..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps.c +++ /dev/null @@ -1,79 +0,0 @@ -#define _GNU_SOURCE -#include -#include -#include -#include -#include -#include - -/* - * All of the code here is run inside an aync-signal-safe context, so we need - * to be careful to not call any functions that could cause issues. In theory, - * since we are a Go program, there are fewer restrictions in practice, it's - * better to be safe than sorry. - * - * The only exception is exit, which we need to call to make sure we don't - * return into runc. - */ - -void bail(int pipefd, const char *fmt, ...) -{ - va_list args; - - va_start(args, fmt); - vdprintf(pipefd, fmt, args); - va_end(args); - - exit(1); -} - -int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd) -{ - char buffer[4096] = { 0 }; - - pid_t child = fork(); - if (child != 0) - return child; - /* in child */ - - /* Join the target userns. */ - int nsfd = open(userns_path, O_RDONLY); - if (nsfd < 0) - bail(errfd, "open userns path %s failed: %m", userns_path); - - int err = setns(nsfd, CLONE_NEWUSER); - if (err < 0) - bail(errfd, "setns %s failed: %m", userns_path); - - close(nsfd); - - /* Pipe the requested file contents. */ - int fd = open(path, O_RDONLY); - if (fd < 0) - bail(errfd, "open %s in userns %s failed: %m", path, userns_path); - - int nread, ntotal = 0; - while ((nread = read(fd, buffer, sizeof(buffer))) != 0) { - if (nread < 0) - bail(errfd, "read bytes from %s failed (after %d total bytes read): %m", path, ntotal); - ntotal += nread; - - int nwritten = 0; - while (nwritten < nread) { - int n = write(outfd, buffer, nread - nwritten); - if (n < 0) - bail(errfd, "write %d bytes from %s failed (after %d bytes written): %m", - nread - nwritten, path, nwritten); - nwritten += n; - } - if (nread != nwritten) - bail(errfd, "mismatch for bytes read and written: %d read != %d written", nread, nwritten); - } - - close(fd); - close(outfd); - close(errfd); - - /* We must exit here, otherwise we would return into a forked runc. */ - exit(0); -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go deleted file mode 100644 index 7a8c2b023b..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_maps_linux.go +++ /dev/null @@ -1,186 +0,0 @@ -//go:build linux - -package userns - -import ( - "bufio" - "bytes" - "fmt" - "io" - "os" - "unsafe" - - "github.com/opencontainers/runc/libcontainer/configs" - "github.com/sirupsen/logrus" -) - -/* -#include -extern int spawn_userns_cat(char *userns_path, char *path, int outfd, int errfd); -*/ -import "C" - -func parseIdmapData(data []byte) (ms []configs.IDMap, err error) { - scanner := bufio.NewScanner(bytes.NewReader(data)) - for scanner.Scan() { - var m configs.IDMap - line := scanner.Text() - if _, err := fmt.Sscanf(line, "%d %d %d", &m.ContainerID, &m.HostID, &m.Size); err != nil { - return nil, fmt.Errorf("parsing id map failed: invalid format in line %q: %w", line, err) - } - ms = append(ms, m) - } - if err := scanner.Err(); err != nil { - return nil, fmt.Errorf("parsing id map failed: %w", err) - } - return ms, nil -} - -// Do something equivalent to nsenter --user= cat , but more -// efficiently. Returns the contents of the requested file from within the user -// namespace. -func spawnUserNamespaceCat(nsPath string, path string) ([]byte, error) { - rdr, wtr, err := os.Pipe() - if err != nil { - return nil, fmt.Errorf("create pipe for userns spawn failed: %w", err) - } - defer rdr.Close() - defer wtr.Close() - - errRdr, errWtr, err := os.Pipe() - if err != nil { - return nil, fmt.Errorf("create error pipe for userns spawn failed: %w", err) - } - defer errRdr.Close() - defer errWtr.Close() - - cNsPath := C.CString(nsPath) - defer C.free(unsafe.Pointer(cNsPath)) - cPath := C.CString(path) - defer C.free(unsafe.Pointer(cPath)) - - childPid := C.spawn_userns_cat(cNsPath, cPath, C.int(wtr.Fd()), C.int(errWtr.Fd())) - - if childPid < 0 { - return nil, fmt.Errorf("failed to spawn fork for userns") - } else if childPid == 0 { - // this should never happen - panic("runc executing inside fork child -- unsafe state!") - } - - // We are in the parent -- close the write end of the pipe before reading. - wtr.Close() - output, err := io.ReadAll(rdr) - rdr.Close() - if err != nil { - return nil, fmt.Errorf("reading from userns spawn failed: %w", err) - } - - // Ditto for the error pipe. - errWtr.Close() - errOutput, err := io.ReadAll(errRdr) - errRdr.Close() - if err != nil { - return nil, fmt.Errorf("reading from userns spawn error pipe failed: %w", err) - } - errOutput = bytes.TrimSpace(errOutput) - - // Clean up the child. - child, err := os.FindProcess(int(childPid)) - if err != nil { - return nil, fmt.Errorf("could not find userns spawn process: %w", err) - } - state, err := child.Wait() - if err != nil { - return nil, fmt.Errorf("failed to wait for userns spawn process: %w", err) - } - if !state.Success() { - errStr := string(errOutput) - if errStr == "" { - errStr = fmt.Sprintf("unknown error (status code %d)", state.ExitCode()) - } - return nil, fmt.Errorf("userns spawn: %s", errStr) - } else if len(errOutput) > 0 { - // We can just ignore weird output in the error pipe if the process - // didn't bail(), but for completeness output for debugging. - logrus.Debugf("userns spawn succeeded but unexpected error message found: %s", string(errOutput)) - } - // The subprocess succeeded, return whatever it wrote to the pipe. - return output, nil -} - -func GetUserNamespaceMappings(nsPath string) (uidMap, gidMap []configs.IDMap, err error) { - var ( - pid int - extra rune - tryFastPath bool - ) - - // nsPath is usually of the form /proc//ns/user, which means that we - // already have a pid that is part of the user namespace and thus we can - // just use the pid to read from /proc//*id_map. - // - // Note that Sscanf doesn't consume the whole input, so we check for any - // trailing data with %c. That way, we can be sure the pattern matched - // /proc/$pid/ns/user _exactly_ iff n === 1. - if n, _ := fmt.Sscanf(nsPath, "/proc/%d/ns/user%c", &pid, &extra); n == 1 { - tryFastPath = pid > 0 - } - - for _, mapType := range []struct { - name string - idMap *[]configs.IDMap - }{ - {"uid_map", &uidMap}, - {"gid_map", &gidMap}, - } { - var mapData []byte - - if tryFastPath { - path := fmt.Sprintf("/proc/%d/%s", pid, mapType.name) - data, err := os.ReadFile(path) - if err != nil { - // Do not error out here -- we need to try the slow path if the - // fast path failed. - logrus.Debugf("failed to use fast path to read %s from userns %s (error: %s), falling back to slow userns-join path", mapType.name, nsPath, err) - } else { - mapData = data - } - } else { - logrus.Debugf("cannot use fast path to read %s from userns %s, falling back to slow userns-join path", mapType.name, nsPath) - } - - if mapData == nil { - // We have to actually join the namespace if we cannot take the - // fast path. The path is resolved with respect to the child - // process, so just use /proc/self. - data, err := spawnUserNamespaceCat(nsPath, "/proc/self/"+mapType.name) - if err != nil { - return nil, nil, err - } - mapData = data - } - idMap, err := parseIdmapData(mapData) - if err != nil { - return nil, nil, fmt.Errorf("failed to parse %s of userns %s: %w", mapType.name, nsPath, err) - } - *mapType.idMap = idMap - } - - return uidMap, gidMap, nil -} - -// IsSameMapping returns whether or not the two id mappings are the same. Note -// that if the order of the mappings is different, or a mapping has been split, -// the mappings will be considered different. -func IsSameMapping(a, b []configs.IDMap) bool { - if len(a) != len(b) { - return false - } - for idx := range a { - if a[idx] != b[idx] { - return false - } - } - return true -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go index 391c811c68..8ed83072c2 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/userns/userns_unsupported.go @@ -1,16 +1,6 @@ //go:build !linux -// +build !linux package userns -// runningInUserNS is a stub for non-Linux systems -// Always returns false -func runningInUserNS() bool { - return false -} - -// uidMapInUserNS is a stub for non-Linux systems -// Always returns false -func uidMapInUserNS(uidMap string) bool { - return false -} +// inUserNS is a stub for non-Linux systems. Always returns false. +func inUserNS() bool { return false } diff --git a/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go b/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go deleted file mode 100644 index 2eb64cf76c..0000000000 --- a/vendor/github.com/opencontainers/runc/libcontainer/userns/usernsfd_linux.go +++ /dev/null @@ -1,156 +0,0 @@ -package userns - -import ( - "fmt" - "os" - "sort" - "strings" - "sync" - "syscall" - - "github.com/sirupsen/logrus" - "golang.org/x/sys/unix" - - "github.com/opencontainers/runc/libcontainer/configs" -) - -type Mapping struct { - UIDMappings []configs.IDMap - GIDMappings []configs.IDMap -} - -func (m Mapping) toSys() (uids, gids []syscall.SysProcIDMap) { - for _, uid := range m.UIDMappings { - uids = append(uids, syscall.SysProcIDMap{ - ContainerID: int(uid.ContainerID), - HostID: int(uid.HostID), - Size: int(uid.Size), - }) - } - for _, gid := range m.GIDMappings { - gids = append(gids, syscall.SysProcIDMap{ - ContainerID: int(gid.ContainerID), - HostID: int(gid.HostID), - Size: int(gid.Size), - }) - } - return -} - -// id returns a unique identifier for this mapping, agnostic of the order of -// the uid and gid mappings (because the order doesn't matter to the kernel). -// The set of userns handles is indexed using this ID. -func (m Mapping) id() string { - var uids, gids []string - for _, idmap := range m.UIDMappings { - uids = append(uids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) - } - for _, idmap := range m.GIDMappings { - gids = append(gids, fmt.Sprintf("%d:%d:%d", idmap.ContainerID, idmap.HostID, idmap.Size)) - } - // We don't care about the sort order -- just sort them. - sort.Strings(uids) - sort.Strings(gids) - return "uid=" + strings.Join(uids, ",") + ";gid=" + strings.Join(gids, ",") -} - -type Handles struct { - m sync.Mutex - maps map[string]*os.File -} - -// Release all resources associated with this Handle. All existing files -// returned from Get() will continue to work even after calling Release(). The -// same Handles can be re-used after calling Release(). -func (hs *Handles) Release() { - hs.m.Lock() - defer hs.m.Unlock() - - // Close the files for good measure, though GC will do that for us anyway. - for _, file := range hs.maps { - _ = file.Close() - } - hs.maps = nil -} - -func spawnProc(req Mapping) (*os.Process, error) { - // We need to spawn a subprocess with the requested mappings, which is - // unfortunately quite expensive. The "safe" way of doing this is natively - // with Go (and then spawning something like "sleep infinity"), but - // execve() is a waste of cycles because we just need some process to have - // the right mapping, we don't care what it's executing. The "unsafe" - // option of doing a clone() behind the back of Go is probably okay in - // theory as long as we just do kill(getpid(), SIGSTOP). However, if we - // tell Go to put the new process into PTRACE_TRACEME mode, we can avoid - // the exec and not have to faff around with the mappings. - // - // Note that Go's stdlib does not support newuidmap, but in the case of - // id-mapped mounts, it seems incredibly unlikely that the user will be - // requesting us to do a remapping as an unprivileged user with mappings - // they have privileges over. - logrus.Debugf("spawning dummy process for id-mapping %s", req.id()) - uidMappings, gidMappings := req.toSys() - // We don't need to use /proc/thread-self here because the exe mm of a - // thread-group is guaranteed to be the same for all threads by definition. - // This lets us avoid having to do runtime.LockOSThread. - return os.StartProcess("/proc/self/exe", []string{"runc", "--help"}, &os.ProcAttr{ - Sys: &syscall.SysProcAttr{ - Cloneflags: unix.CLONE_NEWUSER, - UidMappings: uidMappings, - GidMappings: gidMappings, - GidMappingsEnableSetgroups: false, - // Put the process into PTRACE_TRACEME mode to allow us to get the - // userns without having a proper execve() target. - Ptrace: true, - }, - }) -} - -func dupFile(f *os.File) (*os.File, error) { - newFd, err := unix.FcntlInt(f.Fd(), unix.F_DUPFD_CLOEXEC, 0) - if err != nil { - return nil, os.NewSyscallError("fcntl(F_DUPFD_CLOEXEC)", err) - } - return os.NewFile(uintptr(newFd), f.Name()), nil -} - -// Get returns a handle to a /proc/$pid/ns/user nsfs file with the requested -// mapping. The processes spawned to produce userns nsfds are cached, so if -// equivalent user namespace mappings are requested, the same user namespace -// will be returned. The caller is responsible for closing the returned file -// descriptor. -func (hs *Handles) Get(req Mapping) (file *os.File, err error) { - hs.m.Lock() - defer hs.m.Unlock() - - if hs.maps == nil { - hs.maps = make(map[string]*os.File) - } - - file, ok := hs.maps[req.id()] - if !ok { - proc, err := spawnProc(req) - if err != nil { - return nil, fmt.Errorf("failed to spawn dummy process for map %s: %w", req.id(), err) - } - // Make sure we kill the helper process. We ignore errors because - // there's not much we can do about them anyway, and ultimately - defer func() { - _ = proc.Kill() - _, _ = proc.Wait() - }() - - // Stash away a handle to the userns file. This is neater than keeping - // the process alive, because Go's GC can handle files much better than - // leaked processes, and having long-living useless processes seems - // less than ideal. - file, err = os.Open(fmt.Sprintf("/proc/%d/ns/user", proc.Pid)) - if err != nil { - return nil, err - } - hs.maps[req.id()] = file - } - // Duplicate the file, to make sure the lifecycle of each *os.File we - // return is independent. - return dupFile(file) -} diff --git a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go index f57f0874a0..6bf9102f41 100644 --- a/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go +++ b/vendor/github.com/opencontainers/runc/libcontainer/utils/utils_unix.go @@ -1,5 +1,4 @@ //go:build !windows -// +build !windows package utils @@ -10,6 +9,7 @@ import ( "path/filepath" "runtime" "strconv" + "strings" "sync" _ "unsafe" // for go:linkname @@ -261,3 +261,17 @@ func ProcThreadSelf(subpath string) (string, ProcThreadSelfCloser) { func ProcThreadSelfFd(fd uintptr) (string, ProcThreadSelfCloser) { return ProcThreadSelf("fd/" + strconv.FormatUint(uint64(fd), 10)) } + +// IsLexicallyInRoot is shorthand for strings.HasPrefix(path+"/", root+"/"), +// but properly handling the case where path or root are "/". +// +// NOTE: The return value only make sense if the path doesn't contain "..". +func IsLexicallyInRoot(root, path string) bool { + if root != "/" { + root += "/" + } + if path != "/" { + path += "/" + } + return strings.HasPrefix(path, root) +} diff --git a/vendor/modules.txt b/vendor/modules.txt index 584eed8720..00c926f36a 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -883,8 +883,8 @@ github.com/opencontainers/go-digest ## explicit; go 1.18 github.com/opencontainers/image-spec/specs-go github.com/opencontainers/image-spec/specs-go/v1 -# github.com/opencontainers/runc v1.1.13 => github.com/opencontainers/runc v1.1.1-0.20240131200429-02120488a4c0 -## explicit; go 1.20 +# github.com/opencontainers/runc v1.2.0-rc.2.0.20240801140032-ad5b481dace5 +## explicit; go 1.21 github.com/opencontainers/runc/libcontainer/apparmor github.com/opencontainers/runc/libcontainer/cgroups github.com/opencontainers/runc/libcontainer/cgroups/fs @@ -1369,4 +1369,3 @@ tags.cncf.io/container-device-interface/pkg/parser # tags.cncf.io/container-device-interface/specs-go v0.8.0 ## explicit; go 1.19 tags.cncf.io/container-device-interface/specs-go -# github.com/opencontainers/runc => github.com/opencontainers/runc v1.1.1-0.20240131200429-02120488a4c0