From 900e29549afaaec32dbb807a30352632eeb0bd52 Mon Sep 17 00:00:00 2001 From: Giuseppe Scrivano Date: Thu, 30 May 2024 10:44:18 +0200 Subject: [PATCH] libpod: do not move podman with --cgroups=disabled The expectation with --cgroups=disabled is that the current cgroup is used by the container. Currently the --cgroups=disabled is passed directly to the OCI runtime, but it doesn't stop Podman from creating a new cgroup when it doesn't own the current one. Closes: https://github.com/containers/podman/issues/20910 Signed-off-by: Giuseppe Scrivano --- cmd/podman/common/completion.go | 7 ++++- cmd/podman/root.go | 6 +++- pkg/domain/entities/engine_container.go | 2 +- pkg/domain/infra/abi/system_freebsd.go | 2 +- pkg/domain/infra/abi/system_linux.go | 39 +++++++++++++------------ pkg/domain/infra/tunnel/system.go | 2 +- test/system/420-cgroups.bats | 15 ++++++++++ 7 files changed, 50 insertions(+), 23 deletions(-) diff --git a/cmd/podman/common/completion.go b/cmd/podman/common/completion.go index b5c6c017d6..10d433cf33 100644 --- a/cmd/podman/common/completion.go +++ b/cmd/podman/common/completion.go @@ -58,8 +58,13 @@ func setupContainerEngine(cmd *cobra.Command) (entities.ContainerEngine, error) } if !registry.IsRemote() { _, noMoveProcess := cmd.Annotations[registry.NoMoveProcess] + cgroupMode := "" - err := containerEngine.SetupRootless(registry.Context(), noMoveProcess) + if flag := cmd.LocalFlags().Lookup("cgroups"); flag != nil { + cgroupMode = flag.Value.String() + } + + err := containerEngine.SetupRootless(registry.Context(), noMoveProcess, cgroupMode) if err != nil { return nil, err } diff --git a/cmd/podman/root.go b/cmd/podman/root.go index aaa992b676..ccd767caf5 100644 --- a/cmd/podman/root.go +++ b/cmd/podman/root.go @@ -362,8 +362,12 @@ func persistentPreRunE(cmd *cobra.Command, args []string) error { // 3) command doesn't require Parent Namespace _, found := cmd.Annotations[registry.ParentNSRequired] if !registry.IsRemote() && !found { + cgroupMode := "" _, noMoveProcess := cmd.Annotations[registry.NoMoveProcess] - err := registry.ContainerEngine().SetupRootless(registry.Context(), noMoveProcess) + if flag := cmd.LocalFlags().Lookup("cgroups"); flag != nil { + cgroupMode = flag.Value.String() + } + err := registry.ContainerEngine().SetupRootless(registry.Context(), noMoveProcess, cgroupMode) if err != nil { return err } diff --git a/pkg/domain/entities/engine_container.go b/pkg/domain/entities/engine_container.go index 15cf309bf2..712fdd7cdd 100644 --- a/pkg/domain/entities/engine_container.go +++ b/pkg/domain/entities/engine_container.go @@ -95,7 +95,7 @@ type ContainerEngine interface { //nolint:interfacebloat PodUnpause(ctx context.Context, namesOrIds []string, options PodunpauseOptions) ([]*PodUnpauseReport, error) Renumber(ctx context.Context) error Reset(ctx context.Context) error - SetupRootless(ctx context.Context, noMoveProcess bool) error + SetupRootless(ctx context.Context, noMoveProcess bool, cgroupMode string) error SecretCreate(ctx context.Context, name string, reader io.Reader, options SecretCreateOptions) (*SecretCreateReport, error) SecretInspect(ctx context.Context, nameOrIDs []string, options SecretInspectOptions) ([]*SecretInfoReport, []error, error) SecretList(ctx context.Context, opts SecretListRequest) ([]*SecretInfoReport, error) diff --git a/pkg/domain/infra/abi/system_freebsd.go b/pkg/domain/infra/abi/system_freebsd.go index c6ec91943e..1521a7e1a8 100644 --- a/pkg/domain/infra/abi/system_freebsd.go +++ b/pkg/domain/infra/abi/system_freebsd.go @@ -8,6 +8,6 @@ import ( const defaultRunPath = "/var/run" // SetupRootless in a NOP for freebsd as it only configures the rootless userns on linux. -func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { +func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error { return nil } diff --git a/pkg/domain/infra/abi/system_linux.go b/pkg/domain/infra/abi/system_linux.go index abe00d89af..fe36ad88a7 100644 --- a/pkg/domain/infra/abi/system_linux.go +++ b/pkg/domain/infra/abi/system_linux.go @@ -17,7 +17,7 @@ import ( // Default path for system runtime state const defaultRunPath = "/run" -func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { +func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error { runsUnderSystemd := systemd.RunsOnSystemd() if !runsUnderSystemd { isPid1 := os.Getpid() == 1 @@ -30,30 +30,33 @@ func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) } } - // do it only after podman has already re-execed and running with uid==0. - hasCapSysAdmin, err := unshare.HasCapSysAdmin() - if err != nil { - return err - } - // check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set. - if os.Geteuid() == 0 && hasCapSysAdmin { - ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() + configureCgroup := cgroupMode != "disabled" + if configureCgroup { + // do it only after podman has already re-execed and running with uid==0. + hasCapSysAdmin, err := unshare.HasCapSysAdmin() if err != nil { - logrus.Infof("Failed to detect the owner for the current cgroup: %v", err) + return err } - if !ownsCgroup { - conf, err := ic.Config(context.Background()) + // check for both euid == 0 and CAP_SYS_ADMIN because we may be running in a container with CAP_SYS_ADMIN set. + if os.Geteuid() == 0 && hasCapSysAdmin { + ownsCgroup, err := cgroups.UserOwnsCurrentSystemdCgroup() if err != nil { - return err + logrus.Infof("Failed to detect the owner for the current cgroup: %v", err) } - unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) - if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager { - if err := systemd.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { - logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) + if !ownsCgroup { + conf, err := ic.Config(context.Background()) + if err != nil { + return err + } + unitName := fmt.Sprintf("podman-%d.scope", os.Getpid()) + if runsUnderSystemd || conf.Engine.CgroupManager == config.SystemdCgroupsManager { + if err := systemd.RunUnderSystemdScope(os.Getpid(), "user.slice", unitName); err != nil { + logrus.Debugf("Failed to add podman to systemd sandbox cgroup: %v", err) + } } } + return nil } - return nil } pausePidPath, err := util.GetRootlessPauseProcessPidPath() diff --git a/pkg/domain/infra/tunnel/system.go b/pkg/domain/infra/tunnel/system.go index 492fd0a894..f091fc79cb 100644 --- a/pkg/domain/infra/tunnel/system.go +++ b/pkg/domain/infra/tunnel/system.go @@ -13,7 +13,7 @@ func (ic *ContainerEngine) Info(ctx context.Context) (*define.Info, error) { return system.Info(ic.ClientCtx, nil) } -func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool) error { +func (ic *ContainerEngine) SetupRootless(_ context.Context, noMoveProcess bool, cgroupMode string) error { panic(errors.New("rootless engine mode is not supported when tunneling")) } diff --git a/test/system/420-cgroups.bats b/test/system/420-cgroups.bats index 3269f666cb..928278ac26 100644 --- a/test/system/420-cgroups.bats +++ b/test/system/420-cgroups.bats @@ -37,4 +37,19 @@ load helpers run_podman rm myc } +@test "podman run --cgroups=disabled keeps the current cgroup" { + skip_if_remote "podman-remote does not support --cgroups=disabled" + skip_if_rootless_cgroupsv1 + runtime=$(podman_runtime) + if [[ $runtime != "crun" ]]; then + skip "runtime is $runtime; --cgroups=disabled requires crun" + fi + + current_cgroup=$(cat /proc/self/cgroup) + + # --cgroupns=host is required to have full visibility of the cgroup path inside the container + run_podman run --cgroups=disabled --cgroupns=host --rm $IMAGE cat /proc/self/cgroup + is "$output" $current_cgroup "--cgroups=disabled must not change the current cgroup" +} + # vim: filetype=sh