Skip to content

Commit

Permalink
Relax /sys/dev/block restrictions for volumes and devices
Browse files Browse the repository at this point in the history
User space programs want to access information about the block
devices they are operating on. E.g. the block size is an important
aspect if doing O_DIRECT filesystem calls.

On the other hand, rhbz#1772993 wants to keep the host information
as hidden from the container running processes as possible.

We expose only the volumes and devices that are mounted into the
container by re-generating the symlinks in /sys/dev/block for the
block devices that have host based symlinks. These are generated on
ctr.state.RunDir/sysdevblock as a mountpoint and mounted ro into the
container.

The default visibility can changed by the user with
--security-opt={u,}mask=/sys/dev/block

Consolidate the libpod.mountBind implementation.

Closes #12746

Signed-off-by: Daniel Black <[email protected]>
  • Loading branch information
grooverdan committed Jul 15, 2022
1 parent 255740b commit 738a98f
Show file tree
Hide file tree
Showing 6 changed files with 132 additions and 38 deletions.
108 changes: 72 additions & 36 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,29 @@ func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
return upperDir, workDir, nil
}

// Add bind mounts to container
func (c *Container) mountBind(g generate.Generator, printWarning bool) {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "rprivate"},
}
if dstPath == sysDevBlock || (c.IsReadOnly() && dstPath != "/dev/shm") {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else if (printWarning) {
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
}
}
}

// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
Expand Down Expand Up @@ -580,25 +603,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
g.SetLinuxMountLabel(c.MountLabel())

// Add bind mounts to container
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "rprivate"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else {
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
}
}
c.mountBind(g, true)

// Add overlay volumes
for _, overlayVol := range c.config.OverlayVolumes {
Expand Down Expand Up @@ -1848,23 +1853,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}

if options.TargetFile != "" || options.CheckpointImageID != "" {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "private"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
}
}
c.mountBind(g, false)
}

// Restore /dev/shm content
Expand Down Expand Up @@ -2247,6 +2236,12 @@ func (c *Container) makeBindMounts() error {
}
}

if MountExists(c.config.Spec.Mounts, sysDevBlock) {
if err := c.createSysDevBlock(); err != nil {
return fmt.Errorf("error creating /sys/dev/block structure for container %s: %w", c.ID(), err)
}
}

_, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
if !hasRunContainerenv {
// check in the spec mounts
Expand Down Expand Up @@ -2536,6 +2531,47 @@ func (c *Container) createHosts() error {
return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
}

func (c *Container) createSysDevBlock() error {
// Remove from mounts
for i, m := range c.config.Spec.Mounts {
if m.Destination == sysDevBlock {
if !(m.Type == "bind" && m.Source == "") {
// This is a user bind mount to /sys/dev/block
// Let them try
return nil
}
// Otherwise it was SpecGenToOCI created.
c.config.Spec.Mounts[i] = c.config.Spec.Mounts[len(c.config.Spec.Mounts)-1]
c.config.Spec.Mounts = c.config.Spec.Mounts[:len(c.config.Spec.Mounts)-1]
}
}

dir := fmt.Sprintf("%s/sysdevblock", c.state.RunDir)
c.state.BindMounts[sysDevBlock] = dir
if err := os.MkdirAll(dir, 0700); err != nil {
return err
}

for _, p := range c.config.Spec.Linux.Devices {
logrus.Debugf("Symlink for device %s", p.Path)
if err := createSysBlockSymlink(p.Path, dir); err != nil {
return err
}
}
for _, m := range c.config.Spec.Mounts {
logrus.Debugf("Symlink for mount location %s", m.Source)
if err := createSysBlockSymlink(m.Source, dir); err != nil {
return err
}
}

if err := label.Relabel(dir, c.MountLabel(), false); err != nil {
return err
}

return c.mountIntoRootDirs(sysDevBlock, dir)
}

// bindMountRootFile will chown and relabel the source file to make it usable in the container.
// It will also add the path to the container bind mount map.
// source is the path on the host, dest is the path in the container.
Expand Down
1 change: 1 addition & 0 deletions libpod/runtime_ctr.go
Original file line number Diff line number Diff line change
Expand Up @@ -547,6 +547,7 @@ func (r *Runtime) setupContainer(ctx context.Context, ctr *Container) (_ *Contai
toLock.lock.Lock()
defer toLock.lock.Unlock()
}

// Add the container to the state
// TODO: May be worth looking into recovering from name/ID collisions here
if ctr.config.Pod != "" {
Expand Down
24 changes: 24 additions & 0 deletions libpod/util_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package libpod

import (
"fmt"
"os"
"strings"
"syscall"

Expand All @@ -17,6 +18,8 @@ import (
"golang.org/x/sys/unix"
)

const sysDevBlock = "/sys/dev/block"

// systemdSliceFromPath makes a new systemd slice under the given parent with
// the given name.
// The parent must be a slice. The name must NOT include ".slice"
Expand Down Expand Up @@ -138,3 +141,24 @@ func Unmount(mount string) {
}
}
}

// Copies symlink from /sys/dev/block/DEVICE($path) to the $dir
func createSysBlockSymlink(path string, dir string) error {
statT := unix.Stat_t{}
if err := unix.Stat(path, &statT); err == nil {
major, minor := unix.Major(statT.Dev), unix.Minor(statT.Dev)
if statT.Mode&unix.S_IFBLK == unix.S_IFBLK {
// For block, copy what the major/minor the device is, not what fs it is placed on.
major, minor = unix.Major(statT.Rdev), unix.Minor(statT.Rdev)
}
target, errlink := os.Readlink(fmt.Sprintf(sysDevBlock+"/%d:%d", major, minor))
if errlink == nil {
link := fmt.Sprintf("%s/%d:%d", dir, major, minor)
err := os.Symlink(target, link)
if errlink != nil {
return fmt.Errorf("error creating symlink target %s for %s: %w", target, link, err)
}
}
}
return nil
}
1 change: 0 additions & 1 deletion pkg/specgen/generate/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask
"/proc/scsi",
"/sys/firmware",
"/sys/fs/selinux",
"/sys/dev/block",
}

if !privileged {
Expand Down
14 changes: 14 additions & 0 deletions pkg/specgen/generate/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"golang.org/x/sys/unix"
)

const sysDevBlock = "/sys/dev/block"

func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) {
if s.ProcOpts == nil {
return
Expand Down Expand Up @@ -355,6 +357,18 @@ func SpecGenToOCI(ctx context.Context, s *specgen.SpecGenerator, rt *libpod.Runt

BlockAccessToKernelFilesystems(s.Privileged, s.PidNS.IsHost(), s.Mask, s.Unmask, &g)

// If its masked, BlockAccessToKernelFilesystems would have done it, if Unmasked, we don't need to do anything.
if !s.Privileged && shouldMask(sysDevBlock, s.Mask) && shouldMask(sysDevBlock, s.Unmask) {
g.RemoveMount(sysDevBlock)
blkMnt := spec.Mount{
Destination: sysDevBlock,
Type: "bind",
Source: "", // filled out by setupContainer
Options: []string{"rprivate", "nosuid", "noexec", "nodev", "ro"},
}
g.AddMount(blkMnt)
}

g.ClearProcessEnv()
for name, val := range s.Env {
g.AddProcessEnv(name, val)
Expand Down
22 changes: 21 additions & 1 deletion test/system/400-unprivileged-access.bats
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env bats -*- bats -*-
#
# Tests #2730 - regular users are not able to read/write container storage
# Tests #6957 - /sys/dev (et al) are masked from unprivileged containers
# Tests #6957 - /sys/dev (et al) are masked (excluding volumes/devices #12746)
# from unprivileged containers
#

load helpers
Expand Down Expand Up @@ -169,4 +170,23 @@ EOF
done
}

@test "explict /sys/dev/block mount is empty" {
run_podman '?' run --security-opt=mask=/sys/dev/block --rm $IMAGE sh -c 'ls /sys/dev/block/*'
assert $status -ne 2 " exit status: expected !=2 indicating no files in /sys/dev/block"
}

@test "populate /sys/dev/block with volume major:minor link" {
# tmpfs doesn't have a /sys/dev/block link look for something else
dir=$(mktemp -d "$PODMAN_TMPDIR"/podmantestXXXXXXXXXX)
path=/sys/dev/block/$(stat -c '%Hd:%Ld' "${dir}")
if [ ! -h "$path" ]; then
rmdir "$dir"
skip "No $path link to copy"
fi
run_podman '?' run --volume "$dir":/myvol --rm "$IMAGE" readlink "$path"
rmdir "$dir"
if [ -z $result ]; then
die "Missing symlink for $path"
fi
}
# vim: filetype=sh

0 comments on commit 738a98f

Please sign in to comment.