From 4adca286fefbe60882dae861570ee9987f7a2e4f Mon Sep 17 00:00:00 2001 From: Nalin Dahyabhai Date: Tue, 10 Dec 2024 14:58:58 -0500 Subject: [PATCH] chroot: on Linux, try to pivot_root before falling back to chroot Unless --no-pivot or the equivalent API flag is set, try to pivot_root() to enter the rootfs during Run(). Fall back to using chroot() as before if that fails for any reason. Signed-off-by: Nalin Dahyabhai --- Makefile | 2 +- chroot/run_common.go | 14 +++++++---- chroot/run_freebsd.go | 1 + chroot/run_linux.go | 54 +++++++++++++++++++++++++++++++++++++++++-- chroot/run_test.go | 14 ++++++++--- run_freebsd.go | 2 +- run_linux.go | 2 +- 7 files changed, 77 insertions(+), 12 deletions(-) diff --git a/Makefile b/Makefile index 3d7048ace27..1032ef9a70e 100644 --- a/Makefile +++ b/Makefile @@ -99,7 +99,7 @@ FREEBSD_CROSS_TARGETS := $(filter bin/buildah.freebsd.%,$(ALL_CROSS_TARGETS)) .PHONY: cross cross: $(LINUX_CROSS_TARGETS) $(DARWIN_CROSS_TARGETS) $(WINDOWS_CROSS_TARGETS) $(FREEBSD_CROSS_TARGETS) -bin/buildah.%: +bin/buildah.%: $(SOURCES) mkdir -p ./bin GOOS=$(word 2,$(subst ., ,$@)) GOARCH=$(word 3,$(subst ., ,$@)) $(GO_BUILD) $(BUILDAH_LDFLAGS) -o $@ -tags "containers_image_openpgp" ./cmd/buildah diff --git a/chroot/run_common.go b/chroot/run_common.go index be2a571ceb4..895b4065bac 100644 --- a/chroot/run_common.go +++ b/chroot/run_common.go @@ -48,12 +48,13 @@ func init() { type runUsingChrootExecSubprocOptions struct { Spec *specs.Spec BundlePath string + NoPivot bool } // RunUsingChroot runs a chrooted process, using some of the settings from the // passed-in spec, and using the specified bundlePath to hold temporary files, // directories, and mountpoints. -func RunUsingChroot(spec *specs.Spec, bundlePath, homeDir string, stdin io.Reader, stdout, stderr io.Writer) (err error) { +func RunUsingChroot(spec *specs.Spec, bundlePath, homeDir string, stdin io.Reader, stdout, stderr io.Writer, noPivot bool) (err error) { var confwg sync.WaitGroup var homeFound bool for _, env := range spec.Process.Env { @@ -97,6 +98,7 @@ func RunUsingChroot(spec *specs.Spec, bundlePath, homeDir string, stdin io.Reade config, conferr := json.Marshal(runUsingChrootSubprocOptions{ Spec: spec, BundlePath: bundlePath, + NoPivot: noPivot, }) if conferr != nil { return fmt.Errorf("encoding configuration for %q: %w", runUsingChrootCommand, conferr) @@ -196,6 +198,7 @@ func runUsingChrootMain() { fmt.Fprintf(os.Stderr, "invalid options spec in runUsingChrootMain\n") os.Exit(1) } + noPivot := options.NoPivot // Prepare to shuttle stdio back and forth. rootUID32, rootGID32, err := util.GetHostRootIDs(options.Spec) @@ -442,7 +445,7 @@ func runUsingChrootMain() { }() // Set up mounts and namespaces, and run the parent subprocess. - status, err := runUsingChroot(options.Spec, options.BundlePath, ctty, stdin, stdout, stderr, closeOnceRunning) + status, err := runUsingChroot(options.Spec, options.BundlePath, ctty, stdin, stdout, stderr, noPivot, closeOnceRunning) if err != nil { fmt.Fprintf(os.Stderr, "error running subprocess: %v\n", err) os.Exit(1) @@ -463,7 +466,7 @@ func runUsingChrootMain() { // runUsingChroot, still in the grandparent process, sets up various bind // mounts and then runs the parent process in its own user namespace with the // necessary ID mappings. -func runUsingChroot(spec *specs.Spec, bundlePath string, ctty *os.File, stdin io.Reader, stdout, stderr io.Writer, closeOnceRunning []*os.File) (wstatus unix.WaitStatus, err error) { +func runUsingChroot(spec *specs.Spec, bundlePath string, ctty *os.File, stdin io.Reader, stdout, stderr io.Writer, noPivot bool, closeOnceRunning []*os.File) (wstatus unix.WaitStatus, err error) { var confwg sync.WaitGroup // Create a new mount namespace for ourselves and bind mount everything to a new location. @@ -496,6 +499,7 @@ func runUsingChroot(spec *specs.Spec, bundlePath string, ctty *os.File, stdin io config, conferr := json.Marshal(runUsingChrootExecSubprocOptions{ Spec: spec, BundlePath: bundlePath, + NoPivot: noPivot, }) if conferr != nil { fmt.Fprintf(os.Stderr, "error re-encoding configuration for %q\n", runUsingChrootExecCommand) @@ -619,8 +623,10 @@ func runUsingChrootExecMain() { // Try to chroot into the root. Do this before we potentially // block the syscall via the seccomp profile. Allow the // platform to override this - on FreeBSD, we use a simple - // jail to set the hostname in the container + // jail to set the hostname in the container, and on Linux + // we attempt to pivot_root. if err := createPlatformContainer(options); err != nil { + logrus.Debugf("createPlatformContainer: %v", err) var oldst, newst unix.Stat_t if err := unix.Stat(options.Spec.Root.Path, &oldst); err != nil { fmt.Fprintf(os.Stderr, "error stat()ing intended root directory %q: %v\n", options.Spec.Root.Path, err) diff --git a/chroot/run_freebsd.go b/chroot/run_freebsd.go index e32a4c9a464..4f2c49bdab4 100644 --- a/chroot/run_freebsd.go +++ b/chroot/run_freebsd.go @@ -41,6 +41,7 @@ var ( type runUsingChrootSubprocOptions struct { Spec *specs.Spec BundlePath string + NoPivot bool } func setPlatformUnshareOptions(spec *specs.Spec, cmd *unshare.Cmd) error { diff --git a/chroot/run_linux.go b/chroot/run_linux.go index 6fe93e0929b..694010b98e7 100644 --- a/chroot/run_linux.go +++ b/chroot/run_linux.go @@ -47,6 +47,7 @@ var ( type runUsingChrootSubprocOptions struct { Spec *specs.Spec BundlePath string + NoPivot bool UIDMappings []syscall.SysProcIDMap GIDMappings []syscall.SysProcIDMap } @@ -224,8 +225,57 @@ func makeRlimit(limit specs.POSIXRlimit) unix.Rlimit { return unix.Rlimit{Cur: limit.Soft, Max: limit.Hard} } -func createPlatformContainer(_ runUsingChrootExecSubprocOptions) error { - return errors.New("unsupported createPlatformContainer") +func createPlatformContainer(options runUsingChrootExecSubprocOptions) error { + if options.NoPivot { + return errors.New("not using pivot_root()") + } + // borrowing a technique from runc, who credit the LXC maintainers for this + // open descriptors for the old and new root directories so that we can use fchdir() + oldRootFd, err := unix.Open("/", unix.O_DIRECTORY, 0) + if err != nil { + return fmt.Errorf("opening host root directory: %w", err) + } + defer func() { + if err := unix.Close(oldRootFd); err != nil { + logrus.Warnf("closing host root directory: %v", err) + } + }() + newRootFd, err := unix.Open(options.Spec.Root.Path, unix.O_DIRECTORY, 0) + if err != nil { + return fmt.Errorf("opening container root directory: %w", err) + } + defer func() { + if err := unix.Close(newRootFd); err != nil { + logrus.Warnf("closing container root directory: %v", err) + } + }() + // change to the new root directory + if err := unix.Fchdir(newRootFd); err != nil { + return fmt.Errorf("changing to container root directory: %w", err) + } + // this makes the current directory the root directory. not actually + // sure what happens to the other one + if err := unix.PivotRoot(".", "."); err != nil { + return fmt.Errorf("pivot_root: %w", err) + } + // go back and clean up the old one + if err := unix.Fchdir(oldRootFd); err != nil { + return fmt.Errorf("changing to host root directory: %w", err) + } + // make sure we only unmount things under this tree + if err := unix.Mount(".", ".", "bind", unix.MS_BIND|unix.MS_SLAVE|unix.MS_REC, ""); err != nil { + return fmt.Errorf("tweaking mount flags on host root directory before unmounting from mount namespace: %w", err) + } + // detach this (unnamed?) old directory + if err := unix.Unmount(".", unix.MNT_DETACH); err != nil { + return fmt.Errorf("unmounting host root directory in mount namespace: %w", err) + } + // go back to a named root directory + if err := unix.Fchdir(newRootFd); err != nil { + return fmt.Errorf("changing to container root directory at last: %w", err) + } + logrus.Debugf("pivot_root()ed into %q", options.Spec.Root.Path) + return nil } func mountFlagsForFSFlags(fsFlags uintptr) uintptr { diff --git a/chroot/run_test.go b/chroot/run_test.go index e9406705117..87f28eb6aa6 100644 --- a/chroot/run_test.go +++ b/chroot/run_test.go @@ -36,7 +36,7 @@ func TestMain(m *testing.M) { os.Exit(m.Run()) } -func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundleDir string), verify func(t *testing.T, report *types.TestReport)) { +func testMinimalWithPivot(t *testing.T, noPivot bool, modify func(g *generate.Generator, rootDir, bundleDir string), verify func(t *testing.T, report *types.TestReport)) { t.Helper() g, err := generate.New("linux") if err != nil { @@ -100,8 +100,8 @@ func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundl } output := new(bytes.Buffer) - if err := RunUsingChroot(g.Config, bundleDir, "/", new(bytes.Buffer), output, output); err != nil { - t.Fatalf("run: %v: %s", err, output.String()) + if err := RunUsingChroot(g.Config, bundleDir, "/", new(bytes.Buffer), output, output, noPivot); err != nil { + t.Fatalf("run(noPivot=false): %v: %s", err, output.String()) } var report types.TestReport @@ -114,6 +114,14 @@ func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundl } } +func testMinimal(t *testing.T, modify func(g *generate.Generator, rootDir, bundleDir string), verify func(t *testing.T, report *types.TestReport)) { + for _, noPivot := range []bool{false, true} { + t.Run(fmt.Sprintf("noPivot=%v", noPivot), func(t *testing.T) { + testMinimalWithPivot(t, noPivot, modify, verify) + }) + } +} + func TestNoop(t *testing.T) { if unix.Getuid() != 0 { t.Skip("tests need to be run as root") diff --git a/run_freebsd.go b/run_freebsd.go index 79ea3a44d3d..7d03759ef3c 100644 --- a/run_freebsd.go +++ b/run_freebsd.go @@ -328,7 +328,7 @@ func (b *Builder) Run(command []string, options RunOptions) error { } err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec, mountPoint, path, containerName, b.Container, hostsFile, resolvFile) case IsolationChroot: - err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr) + err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr, options.NoPivot) default: err = errors.New("don't know how to run this command") } diff --git a/run_linux.go b/run_linux.go index 71d92323e5c..0acee7ac84e 100644 --- a/run_linux.go +++ b/run_linux.go @@ -531,7 +531,7 @@ rootless=%d err = b.runUsingRuntimeSubproc(isolation, options, configureNetwork, networkString, moreCreateArgs, spec, mountPoint, path, define.Package+"-"+filepath.Base(path), b.Container, hostsFile, resolvFile) case IsolationChroot: - err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr) + err = chroot.RunUsingChroot(spec, path, homeDir, options.Stdin, options.Stdout, options.Stderr, options.NoPivot) case IsolationOCIRootless: moreCreateArgs := []string{"--no-new-keyring"} if options.NoPivot {