From 597bbfa7c6820383ba011f0698a934575bbfef9b Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Wed, 30 Mar 2022 15:38:38 +1100 Subject: [PATCH 1/2] Relax /sys/dev/block restrictions for volumes and devices User space programs want to access information about the block devices they are operating on. E.g. the block size is an important aspect if doing O_DIRECT filesystem calls. On the other hand, rhbz#1772993 wants to keep the host information as hidden from the container running processes as possible. We expose only the volumes and devices that are mounted into the container by re-generating the symlinks in /sys/dev/block for the block devices that have host based symlinks. These are generated on ctr.state.RunDir/sysdevblock as a mountpoint and mounted ro into the container. The default visibility can changed by the user with --security-opt={u,}mask=/sys/dev/block Consolidate the libpod.mountBind implementation. Closes #12746 Signed-off-by: Daniel Black --- libpod/container_config.go | 2 + libpod/container_internal_linux.go | 93 +++++++++++++++--------- libpod/options.go | 12 +++ libpod/util_linux.go | 24 ++++++ pkg/specgen/generate/config_linux.go | 1 - pkg/specgen/generate/container_create.go | 4 + pkg/specgen/generate/oci.go | 2 + test/system/400-unprivileged-access.bats | 22 +++++- 8 files changed, 122 insertions(+), 38 deletions(-) diff --git a/libpod/container_config.go b/libpod/container_config.go index 544c45a8cc..d4c044a477 100644 --- a/libpod/container_config.go +++ b/libpod/container_config.go @@ -174,6 +174,8 @@ type ContainerRootFSConfig struct { // treated as root directories. Standard bind mounts will be mounted // into paths relative to these directories. ChrootDirs []string `json:"chroot_directories,omitempty"` + // Expose /sys/dev/block entries for mounts and devices in the container. + SysDevBlock bool `json:"sysdevblock,omitempty"` } // ContainerSecurityConfig is an embedded sub-config providing security configuration diff --git a/libpod/container_internal_linux.go b/libpod/container_internal_linux.go index a131ab3674..306c07e9b7 100644 --- a/libpod/container_internal_linux.go +++ b/libpod/container_internal_linux.go @@ -415,6 +415,29 @@ func getOverlayUpperAndWorkDir(options []string) (string, string, error) { return upperDir, workDir, nil } +// Add bind mounts to container +func (c *Container) mountBind(g generate.Generator, printWarning bool) { + for dstPath, srcPath := range c.state.BindMounts { + newMount := spec.Mount{ + Type: "bind", + Source: srcPath, + Destination: dstPath, + Options: []string{"bind", "rprivate"}, + } + if dstPath == sysDevBlock || (c.IsReadOnly() && dstPath != "/dev/shm") { + newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") + } + if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { + newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") + } + if !MountExists(g.Mounts(), dstPath) { + g.AddMount(newMount) + } else if printWarning { + logrus.Infof("User mount overriding libpod mount at %q", dstPath) + } + } +} + // Generate spec for a container // Accepts a map of the container's dependencies func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { @@ -580,25 +603,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) { g.SetLinuxMountLabel(c.MountLabel()) // Add bind mounts to container - for dstPath, srcPath := range c.state.BindMounts { - newMount := spec.Mount{ - Type: "bind", - Source: srcPath, - Destination: dstPath, - Options: []string{"bind", "rprivate"}, - } - if c.IsReadOnly() && dstPath != "/dev/shm" { - newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") - } - if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { - newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") - } - if !MountExists(g.Mounts(), dstPath) { - g.AddMount(newMount) - } else { - logrus.Infof("User mount overriding libpod mount at %q", dstPath) - } - } + c.mountBind(g, true) // Add overlay volumes for _, overlayVol := range c.config.OverlayVolumes { @@ -1848,23 +1853,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti } if options.TargetFile != "" || options.CheckpointImageID != "" { - for dstPath, srcPath := range c.state.BindMounts { - newMount := spec.Mount{ - Type: "bind", - Source: srcPath, - Destination: dstPath, - Options: []string{"bind", "private"}, - } - if c.IsReadOnly() && dstPath != "/dev/shm" { - newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev") - } - if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir { - newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev") - } - if !MountExists(g.Mounts(), dstPath) { - g.AddMount(newMount) - } - } + c.mountBind(g, false) } // Restore /dev/shm content @@ -2247,6 +2236,12 @@ func (c *Container) makeBindMounts() error { } } + if c.config.SysDevBlock { + if err := c.createSysDevBlock(); err != nil { + return fmt.Errorf("error creating /sys/dev/block structure for container %s: %w", c.ID(), err) + } + } + _, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"] if !hasRunContainerenv { // check in the spec mounts @@ -2536,6 +2531,32 @@ func (c *Container) createHosts() error { return c.bindMountRootFile(targetFile, config.DefaultHostsFile) } +func (c *Container) createSysDevBlock() error { + dir := fmt.Sprintf("%s/sysdevblock", c.state.RunDir) + c.state.BindMounts[sysDevBlock] = dir + if err := os.MkdirAll(dir, 0700); err != nil { + return err + } + for _, p := range c.config.Spec.Linux.Devices { + logrus.Debugf("Symlink for device %s", p.Path) + if err := createSysBlockSymlink(p.Path, dir); err != nil { + return err + } + } + for _, m := range c.config.Spec.Mounts { + logrus.Debugf("Symlink for mount location %s", m.Source) + if err := createSysBlockSymlink(m.Source, dir); err != nil { + return err + } + } + + if err := label.Relabel(dir, c.MountLabel(), false); err != nil { + return err + } + + return c.mountIntoRootDirs(sysDevBlock, dir) +} + // bindMountRootFile will chown and relabel the source file to make it usable in the container. // It will also add the path to the container bind mount map. // source is the path on the host, dest is the path in the container. diff --git a/libpod/options.go b/libpod/options.go index b31cb4ab24..ec28c792f5 100644 --- a/libpod/options.go +++ b/libpod/options.go @@ -303,6 +303,18 @@ func WithStorageOpts(storageOpts map[string]string) CtrCreateOption { } } +// WithSysDevBlock sets the options to create /sys/dev/block symlinks +// across reboots will be stored. +func WithSysDevBlock() CtrCreateOption { + return func(ctr *Container) error { + if ctr.valid { + return define.ErrCtrFinalized + } + ctr.config.SysDevBlock = true + return nil + } +} + // WithDefaultMountsFile sets the file to look at for default mounts (mainly // secrets). // Note we are not saving this in the database as it is for testing purposes diff --git a/libpod/util_linux.go b/libpod/util_linux.go index 7c79e6ce4e..0232716797 100644 --- a/libpod/util_linux.go +++ b/libpod/util_linux.go @@ -5,6 +5,7 @@ package libpod import ( "fmt" + "os" "strings" "syscall" @@ -17,6 +18,8 @@ import ( "golang.org/x/sys/unix" ) +const sysDevBlock = "/sys/dev/block" + // systemdSliceFromPath makes a new systemd slice under the given parent with // the given name. // The parent must be a slice. The name must NOT include ".slice" @@ -138,3 +141,24 @@ func Unmount(mount string) { } } } + +// Copies symlink from /sys/dev/block/DEVICE($path) to the $dir +func createSysBlockSymlink(path string, dir string) error { + statT := unix.Stat_t{} + if err := unix.Stat(path, &statT); err == nil { + major, minor := unix.Major(uint64(statT.Dev)), unix.Minor(uint64(statT.Dev)) + if statT.Mode&unix.S_IFBLK == unix.S_IFBLK { + // For block, copy what the major/minor the device is, not what fs it is placed on. + major, minor = unix.Major(uint64(statT.Rdev)), unix.Minor(uint64(statT.Rdev)) + } + target, errlink := os.Readlink(fmt.Sprintf(sysDevBlock+"/%d:%d", major, minor)) + if errlink == nil { + link := fmt.Sprintf("%s/%d:%d", dir, major, minor) + err := os.Symlink(target, link) + if errlink != nil { + return fmt.Errorf("error creating symlink target %s for %s: %w", target, link, err) + } + } + } + return nil +} diff --git a/pkg/specgen/generate/config_linux.go b/pkg/specgen/generate/config_linux.go index a469661611..17a5dee956 100644 --- a/pkg/specgen/generate/config_linux.go +++ b/pkg/specgen/generate/config_linux.go @@ -85,7 +85,6 @@ func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask "/proc/scsi", "/sys/firmware", "/sys/fs/selinux", - "/sys/dev/block", } if !privileged { diff --git a/pkg/specgen/generate/container_create.go b/pkg/specgen/generate/container_create.go index 8334d386f5..d440b6cbad 100644 --- a/pkg/specgen/generate/container_create.go +++ b/pkg/specgen/generate/container_create.go @@ -219,6 +219,10 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener } } } + // If its masked, BlockAccessToKernelFilesystems would have done it, if Unmasked, we don't need to do anything. + if !s.Privileged && shouldMask(sysDevBlock, s.Mask) && shouldMask(sysDevBlock, s.Unmask) { + options = append(options, libpod.WithSysDevBlock()) + } if len(s.HostDeviceList) > 0 { options = append(options, libpod.WithHostDevice(s.HostDeviceList)) } diff --git a/pkg/specgen/generate/oci.go b/pkg/specgen/generate/oci.go index f59fe10116..5dc29810c0 100644 --- a/pkg/specgen/generate/oci.go +++ b/pkg/specgen/generate/oci.go @@ -20,6 +20,8 @@ import ( "golang.org/x/sys/unix" ) +const sysDevBlock = "/sys/dev/block" + func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) { if s.ProcOpts == nil { return diff --git a/test/system/400-unprivileged-access.bats b/test/system/400-unprivileged-access.bats index 0d6be2d609..94757331ec 100644 --- a/test/system/400-unprivileged-access.bats +++ b/test/system/400-unprivileged-access.bats @@ -1,7 +1,8 @@ #!/usr/bin/env bats -*- bats -*- # # Tests #2730 - regular users are not able to read/write container storage -# Tests #6957 - /sys/dev (et al) are masked from unprivileged containers +# Tests #6957 - /sys/dev (et al) are masked (excluding volumes/devices #12746) +# from unprivileged containers # load helpers @@ -170,4 +171,23 @@ EOF done } +@test "explict /sys/dev/block mount is empty" { + run_podman '?' run --security-opt=mask=/sys/dev/block --rm $IMAGE sh -c 'ls /sys/dev/block/*' + assert $status -ne 2 " exit status: expected !=2 indicating no files in /sys/dev/block" +} + +@test "populate /sys/dev/block with volume major:minor link" { + # tmpfs doesn't have a /sys/dev/block link look for something else + dir=$(mktemp -d "$PODMAN_TMPDIR"/podmantestXXXXXXXXXX) + path=/sys/dev/block/$(stat -c '%Hd:%Ld' "${dir}") + if [ ! -h "$path" ]; then + rmdir "$dir" + skip "No $path link to copy" + fi + run_podman '?' run --volume "$dir":/myvol --rm "$IMAGE" readlink "$path" + rmdir "$dir" + if [ -z $result ]; then + die "Missing symlink for $path" + fi +} # vim: filetype=sh From f8f8276b48466dfa7c639dfa862c4dc91442bafa Mon Sep 17 00:00:00 2001 From: Daniel Black Date: Tue, 9 Aug 2022 18:10:27 +1000 Subject: [PATCH 2/2] golangci-lint for non-mips Signed-off-by: Daniel Black --- libpod/util_linux.go | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libpod/util_linux.go b/libpod/util_linux.go index 0232716797..9dd5059d2d 100644 --- a/libpod/util_linux.go +++ b/libpod/util_linux.go @@ -146,10 +146,10 @@ func Unmount(mount string) { func createSysBlockSymlink(path string, dir string) error { statT := unix.Stat_t{} if err := unix.Stat(path, &statT); err == nil { - major, minor := unix.Major(uint64(statT.Dev)), unix.Minor(uint64(statT.Dev)) + major, minor := unix.Major(statT.Dev), unix.Minor(statT.Dev) if statT.Mode&unix.S_IFBLK == unix.S_IFBLK { // For block, copy what the major/minor the device is, not what fs it is placed on. - major, minor = unix.Major(uint64(statT.Rdev)), unix.Minor(uint64(statT.Rdev)) + major, minor = unix.Major(statT.Rdev), unix.Minor(statT.Rdev) } target, errlink := os.Readlink(fmt.Sprintf(sysDevBlock+"/%d:%d", major, minor)) if errlink == nil {