Skip to content

Commit

Permalink
Relax /sys/dev/block restrictions for volumes and devices
Browse files Browse the repository at this point in the history
User space programs want to access information about the block
devices they are operating on. E.g. the block size is an important
aspect if doing O_DIRECT filesystem calls.

On the other hand, rhbz#1772993 wants to keep the host information
as hidden from the container running processes as possible.

We expose only the volumes and devices that are mounted into the
container by re-generating the symlinks in /sys/dev/block for the
block devices that have host based symlinks. These are generated on
ctr.state.RunDir/sysdevblock as a mountpoint and mounted ro into the
container.

The default visibility can changed by the user with
--security-opt={u,}mask=/sys/dev/block

Consolidate the libpod.mountBind implementation.

Closes #12746

Signed-off-by: Daniel Black <[email protected]>
  • Loading branch information
grooverdan committed Aug 9, 2022
1 parent 7992d86 commit 597bbfa
Show file tree
Hide file tree
Showing 8 changed files with 122 additions and 38 deletions.
2 changes: 2 additions & 0 deletions libpod/container_config.go
Original file line number Diff line number Diff line change
Expand Up @@ -174,6 +174,8 @@ type ContainerRootFSConfig struct {
// treated as root directories. Standard bind mounts will be mounted
// into paths relative to these directories.
ChrootDirs []string `json:"chroot_directories,omitempty"`
// Expose /sys/dev/block entries for mounts and devices in the container.
SysDevBlock bool `json:"sysdevblock,omitempty"`
}

// ContainerSecurityConfig is an embedded sub-config providing security configuration
Expand Down
93 changes: 57 additions & 36 deletions libpod/container_internal_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -415,6 +415,29 @@ func getOverlayUpperAndWorkDir(options []string) (string, string, error) {
return upperDir, workDir, nil
}

// Add bind mounts to container
func (c *Container) mountBind(g generate.Generator, printWarning bool) {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "rprivate"},
}
if dstPath == sysDevBlock || (c.IsReadOnly() && dstPath != "/dev/shm") {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else if printWarning {
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
}
}
}

// Generate spec for a container
// Accepts a map of the container's dependencies
func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
Expand Down Expand Up @@ -580,25 +603,7 @@ func (c *Container) generateSpec(ctx context.Context) (*spec.Spec, error) {
g.SetLinuxMountLabel(c.MountLabel())

// Add bind mounts to container
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "rprivate"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
} else {
logrus.Infof("User mount overriding libpod mount at %q", dstPath)
}
}
c.mountBind(g, true)

// Add overlay volumes
for _, overlayVol := range c.config.OverlayVolumes {
Expand Down Expand Up @@ -1848,23 +1853,7 @@ func (c *Container) restore(ctx context.Context, options ContainerCheckpointOpti
}

if options.TargetFile != "" || options.CheckpointImageID != "" {
for dstPath, srcPath := range c.state.BindMounts {
newMount := spec.Mount{
Type: "bind",
Source: srcPath,
Destination: dstPath,
Options: []string{"bind", "private"},
}
if c.IsReadOnly() && dstPath != "/dev/shm" {
newMount.Options = append(newMount.Options, "ro", "nosuid", "noexec", "nodev")
}
if dstPath == "/dev/shm" && c.state.BindMounts["/dev/shm"] == c.config.ShmDir {
newMount.Options = append(newMount.Options, "nosuid", "noexec", "nodev")
}
if !MountExists(g.Mounts(), dstPath) {
g.AddMount(newMount)
}
}
c.mountBind(g, false)
}

// Restore /dev/shm content
Expand Down Expand Up @@ -2247,6 +2236,12 @@ func (c *Container) makeBindMounts() error {
}
}

if c.config.SysDevBlock {
if err := c.createSysDevBlock(); err != nil {
return fmt.Errorf("error creating /sys/dev/block structure for container %s: %w", c.ID(), err)
}
}

_, hasRunContainerenv := c.state.BindMounts["/run/.containerenv"]
if !hasRunContainerenv {
// check in the spec mounts
Expand Down Expand Up @@ -2536,6 +2531,32 @@ func (c *Container) createHosts() error {
return c.bindMountRootFile(targetFile, config.DefaultHostsFile)
}

func (c *Container) createSysDevBlock() error {
dir := fmt.Sprintf("%s/sysdevblock", c.state.RunDir)
c.state.BindMounts[sysDevBlock] = dir
if err := os.MkdirAll(dir, 0700); err != nil {
return err
}
for _, p := range c.config.Spec.Linux.Devices {
logrus.Debugf("Symlink for device %s", p.Path)
if err := createSysBlockSymlink(p.Path, dir); err != nil {
return err
}
}
for _, m := range c.config.Spec.Mounts {
logrus.Debugf("Symlink for mount location %s", m.Source)
if err := createSysBlockSymlink(m.Source, dir); err != nil {
return err
}
}

if err := label.Relabel(dir, c.MountLabel(), false); err != nil {
return err
}

return c.mountIntoRootDirs(sysDevBlock, dir)
}

// bindMountRootFile will chown and relabel the source file to make it usable in the container.
// It will also add the path to the container bind mount map.
// source is the path on the host, dest is the path in the container.
Expand Down
12 changes: 12 additions & 0 deletions libpod/options.go
Original file line number Diff line number Diff line change
Expand Up @@ -303,6 +303,18 @@ func WithStorageOpts(storageOpts map[string]string) CtrCreateOption {
}
}

// WithSysDevBlock sets the options to create /sys/dev/block symlinks
// across reboots will be stored.
func WithSysDevBlock() CtrCreateOption {
return func(ctr *Container) error {
if ctr.valid {
return define.ErrCtrFinalized
}
ctr.config.SysDevBlock = true
return nil
}
}

// WithDefaultMountsFile sets the file to look at for default mounts (mainly
// secrets).
// Note we are not saving this in the database as it is for testing purposes
Expand Down
24 changes: 24 additions & 0 deletions libpod/util_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ package libpod

import (
"fmt"
"os"
"strings"
"syscall"

Expand All @@ -17,6 +18,8 @@ import (
"golang.org/x/sys/unix"
)

const sysDevBlock = "/sys/dev/block"

// systemdSliceFromPath makes a new systemd slice under the given parent with
// the given name.
// The parent must be a slice. The name must NOT include ".slice"
Expand Down Expand Up @@ -138,3 +141,24 @@ func Unmount(mount string) {
}
}
}

// Copies symlink from /sys/dev/block/DEVICE($path) to the $dir
func createSysBlockSymlink(path string, dir string) error {
statT := unix.Stat_t{}
if err := unix.Stat(path, &statT); err == nil {
major, minor := unix.Major(uint64(statT.Dev)), unix.Minor(uint64(statT.Dev))
if statT.Mode&unix.S_IFBLK == unix.S_IFBLK {
// For block, copy what the major/minor the device is, not what fs it is placed on.
major, minor = unix.Major(uint64(statT.Rdev)), unix.Minor(uint64(statT.Rdev))
}
target, errlink := os.Readlink(fmt.Sprintf(sysDevBlock+"/%d:%d", major, minor))
if errlink == nil {
link := fmt.Sprintf("%s/%d:%d", dir, major, minor)
err := os.Symlink(target, link)
if errlink != nil {
return fmt.Errorf("error creating symlink target %s for %s: %w", target, link, err)
}
}
}
return nil
}
1 change: 0 additions & 1 deletion pkg/specgen/generate/config_linux.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,7 +85,6 @@ func BlockAccessToKernelFilesystems(privileged, pidModeIsHost bool, mask, unmask
"/proc/scsi",
"/sys/firmware",
"/sys/fs/selinux",
"/sys/dev/block",
}

if !privileged {
Expand Down
4 changes: 4 additions & 0 deletions pkg/specgen/generate/container_create.go
Original file line number Diff line number Diff line change
Expand Up @@ -219,6 +219,10 @@ func MakeContainer(ctx context.Context, rt *libpod.Runtime, s *specgen.SpecGener
}
}
}
// If its masked, BlockAccessToKernelFilesystems would have done it, if Unmasked, we don't need to do anything.
if !s.Privileged && shouldMask(sysDevBlock, s.Mask) && shouldMask(sysDevBlock, s.Unmask) {
options = append(options, libpod.WithSysDevBlock())
}
if len(s.HostDeviceList) > 0 {
options = append(options, libpod.WithHostDevice(s.HostDeviceList))
}
Expand Down
2 changes: 2 additions & 0 deletions pkg/specgen/generate/oci.go
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,8 @@ import (
"golang.org/x/sys/unix"
)

const sysDevBlock = "/sys/dev/block"

func setProcOpts(s *specgen.SpecGenerator, g *generate.Generator) {
if s.ProcOpts == nil {
return
Expand Down
22 changes: 21 additions & 1 deletion test/system/400-unprivileged-access.bats
Original file line number Diff line number Diff line change
@@ -1,7 +1,8 @@
#!/usr/bin/env bats -*- bats -*-
#
# Tests #2730 - regular users are not able to read/write container storage
# Tests #6957 - /sys/dev (et al) are masked from unprivileged containers
# Tests #6957 - /sys/dev (et al) are masked (excluding volumes/devices #12746)
# from unprivileged containers
#

load helpers
Expand Down Expand Up @@ -170,4 +171,23 @@ EOF
done
}

@test "explict /sys/dev/block mount is empty" {
run_podman '?' run --security-opt=mask=/sys/dev/block --rm $IMAGE sh -c 'ls /sys/dev/block/*'
assert $status -ne 2 " exit status: expected !=2 indicating no files in /sys/dev/block"
}

@test "populate /sys/dev/block with volume major:minor link" {
# tmpfs doesn't have a /sys/dev/block link look for something else
dir=$(mktemp -d "$PODMAN_TMPDIR"/podmantestXXXXXXXXXX)
path=/sys/dev/block/$(stat -c '%Hd:%Ld' "${dir}")
if [ ! -h "$path" ]; then
rmdir "$dir"
skip "No $path link to copy"
fi
run_podman '?' run --volume "$dir":/myvol --rm "$IMAGE" readlink "$path"
rmdir "$dir"
if [ -z $result ]; then
die "Missing symlink for $path"
fi
}
# vim: filetype=sh

0 comments on commit 597bbfa

Please sign in to comment.