Skip to content

Commit

Permalink
Merge pull request containerd#10683 from samuelkarp/shim-exec-fp-test…
Browse files Browse the repository at this point in the history
…-1.6

[release/1.6] integration: regression test for issue 10589
  • Loading branch information
samuelkarp authored Sep 6, 2024
2 parents 5f43e4d + ab9fedd commit 172b137
Show file tree
Hide file tree
Showing 5 changed files with 490 additions and 0 deletions.
210 changes: 210 additions & 0 deletions integration/client/container_linux_test.go
Original file line number Diff line number Diff line change
Expand Up @@ -35,14 +35,17 @@ import (
. "github.com/containerd/containerd"
"github.com/containerd/containerd/cio"
"github.com/containerd/containerd/containers"
"github.com/containerd/containerd/integration/failpoint"
"github.com/containerd/containerd/oci"
"github.com/containerd/containerd/pkg/fifosync"
"github.com/containerd/containerd/plugin"
"github.com/containerd/containerd/runtime/linux/runctypes"
"github.com/containerd/containerd/runtime/v2/runc/options"
"github.com/containerd/containerd/sys"
"github.com/containerd/errdefs"

"github.com/opencontainers/runtime-spec/specs-go"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
exec "golang.org/x/sys/execabs"
"golang.org/x/sys/unix"
Expand Down Expand Up @@ -1591,3 +1594,210 @@ func TestIssue9103(t *testing.T) {
})
}
}

// TestIssue10589 is used as regression case for issue 10589.
//
// This issue was caused by a race between init exits and new exec process tracking inside the shim. The test operates
// by controlling the time between when the shim invokes "runc exec" and when the actual "runc exec" is triggered. This
// allows validating that races for shim state tracking between pre- and post-start of the exec process do not exist.
//
// The workflow is as follows:
// 1. Create a container as normal
// 2. Make an exec1 using runc-fp with delayexec
// 3. Wait until the exec is waiting to start (triggered by delayexec)
// 4. Kill the container init process (signalling it is easiest)
// 5. Make an exec2 using runc-fp with delayexec
// 6. Wait until the exec is waiting to start
// 7. Allow exec1 to proceed
// 8. Allow exec2 to proceed
// 9. See that the container has exited and all execs have exited too
//
// https://github.com/containerd/containerd/issues/10589
func TestIssue10589(t *testing.T) {
if f := os.Getenv("RUNC_FLAVOR"); f != "" && f != "runc" {
t.Skip("test requires runc")
}
if rt := os.Getenv("TEST_RUNTIME"); rt != "" && rt != plugin.RuntimeRuncV2 {
t.Skip("test requires io.containerd.runc.v2")
}

client, err := newClient(t, address)
require.NoError(t, err)
t.Cleanup(func() {
client.Close()
})

var (
image Image
ctx, cancel = testContext(t)
id = t.Name()
)
t.Cleanup(cancel)

image, err = client.GetImage(ctx, testImage)
require.NoError(t, err)

// 1. Create a sleeping container
t.Log("1. Create a sleeping container")
container, err := client.NewContainer(ctx, id,
WithNewSnapshot(id, image),
WithNewSpec(oci.WithImageConfig(image),
withProcessArgs("sleep", "inf"),
oci.WithAnnotations(map[string]string{
"oci.runc.failpoint.profile": "delayExec",
}),
),
WithRuntime(client.Runtime(), &options.Options{
BinaryName: "runc-fp",
}),
)
require.NoError(t, err, "create container")
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(ctx, 10*time.Second)
err := container.Delete(ctx, WithSnapshotCleanup)
if err != nil {
t.Log("delete err", err)
}
cancel()
})

task, err := container.NewTask(ctx, empty())
require.NoError(t, err, "create task")
t.Cleanup(func() {
ctx, cancel := context.WithTimeout(ctx, 2*time.Second)
st, err := task.Delete(ctx, WithProcessKill)
t.Log("exit status", st)
if err != nil {
t.Log("kill err", err)
}
cancel()
})

err = task.Start(ctx)
require.NoError(t, err, "start container")

status, err := task.Status(ctx)
require.NoError(t, err, "container status")
require.Equal(t, Running, status.Status)

// 2. Create an exec
t.Log("2. Create exec1")
exec1ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec1-ready.fifo"), 0600)
require.NoError(t, err, "create exec1 ready fifo")
exec1DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec1-delay.fifo"), 0600)
require.NoError(t, err, "create exec1 delay fifo")
exec1, err := task.Exec(ctx, "exec1", &specs.Process{
Args: []string{"/bin/sleep", "301"},
Cwd: "/",
Env: []string{
failpoint.DelayExecReadyEnv + "=" + exec1ReadyFifo.Name(),
failpoint.DelayExecDelayEnv + "=" + exec1DelayFifo.Name(),
},
}, cio.NullIO)
require.NoError(t, err, "create exec1")

exec1done := make(chan struct{})
go func() {
defer close(exec1done)
t.Log("Starting exec1")
err := exec1.Start(ctx)
assert.Error(t, err, "start exec1")
t.Logf("error starting exec1: %s", err)
}()

// 3. Wait until the exec is waiting to start
t.Log("3. Wait until exec1 is waiting to start")
err = exec1ReadyFifo.Wait()
require.NoError(t, err, "open exec1 fifo")

// 4. Kill the container init process
t.Log("4. Kill the container init process")
target := task.Pid()
t.Logf("Killing main pid (%v) of container %s", target, container.ID())
syscall.Kill(int(target), syscall.SIGKILL)
status, err = task.Status(ctx)
require.NoError(t, err, "container status")
t.Log("container status", status.Status)

// 5. Make an exec (2) using this failpoint
t.Log("5. Create exec2")
exec2ReadyFifo, err := fifosync.NewWaiter(filepath.Join(t.TempDir(), "exec2-ready.fifo"), 0600)
require.NoError(t, err, "create exec2 ready fifo: %q", exec2ReadyFifo)
exec2DelayFifo, err := fifosync.NewTrigger(filepath.Join(t.TempDir(), "exec2-delay.fifo"), 0600)
require.NoError(t, err, "create exec2 delay fifo: %q", exec2DelayFifo)
exec2, err := task.Exec(ctx, "exec2", &specs.Process{
Args: []string{"/bin/sleep", "302"},
Cwd: "/",
Env: []string{
failpoint.DelayExecReadyEnv + "=" + exec2ReadyFifo.Name(),
failpoint.DelayExecDelayEnv + "=" + exec2DelayFifo.Name(),
},
}, cio.NullIO)
require.NoError(t, err, "create exec2")

exec2done := make(chan struct{})
didExec2Run := true
go func() {
defer close(exec2done)
t.Log("Starting exec2")
err := exec2.Start(ctx)
assert.Error(t, err, "start exec2")
t.Logf("error starting exec2: %s", err)
}()

// 6. Wait until the exec is waiting to start
t.Log("6. Wait until exec2 is waiting to start")
exec2ready := make(chan struct{})
go func() {
exec2ReadyFifo.Wait()
close(exec2ready)
}()
select {
case <-exec2ready:
case <-exec2done:
didExec2Run = false
}

// 7. Allow exec=1 to proceed
t.Log("7. Allow exec=1 to proceed")
err = exec1DelayFifo.Trigger()
assert.NoError(t, err, "trigger exec1 fifo")
status, err = task.Status(ctx)
require.NoError(t, err, "container status")
t.Log("container status", status.Status)
<-exec1done
status, err = task.Status(ctx)
require.NoError(t, err, "container status")
t.Log("container status", status.Status)

// 8. Allow exec=2 to proceed
if didExec2Run {
t.Log("8. Allow exec2 to proceed")
err = exec2DelayFifo.Trigger()
assert.NoError(t, err, "trigger exec2 fifo")
status, err = task.Status(ctx)
require.NoError(t, err, "container status")
t.Log("container status", status.Status)
<-exec2done
status, err = task.Status(ctx)
require.NoError(t, err, "container status")
t.Log("container status", status.Status)
} else {
t.Log("8. Skip exec2")
}

// 9. Validate
t.Log("9. Validate")
status, err = exec1.Status(ctx)
require.NoError(t, err, "exec1 status")
t.Logf("exec1 status: %s", status.Status)
assert.Equal(t, Created, status.Status)
status, err = exec2.Status(ctx)
require.NoError(t, err, "exec2 status")
t.Logf("exec2 status: %s", status.Status)
assert.Equal(t, Created, status.Status)
status, err = task.Status(ctx)
t.Logf("task status: %s", status.Status)
require.NoError(t, err, "container status")
assert.Equal(t, Stopped, status.Status)
}
131 changes: 131 additions & 0 deletions integration/failpoint/cmd/runc-fp/delayexec.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
//go:build linux

/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package main

import (
"context"
"encoding/json"
"errors"
"fmt"
"io"
"os"
"strings"

"github.com/opencontainers/runtime-spec/specs-go"
"github.com/sirupsen/logrus"

"github.com/containerd/containerd/integration/failpoint"
"github.com/containerd/containerd/pkg/fifosync"
)

// delayExec delays an "exec" command until a trigger is received from the calling test program. This can be used to
// test races around container lifecycle and exec processes.
func delayExec(ctx context.Context, method invoker) error {
isExec := strings.Contains(strings.Join(os.Args, ","), ",exec,")
if !isExec {
if err := method(ctx); err != nil {
return err
}
return nil
}
logrus.Debug("EXEC!")

if err := delay(); err != nil {
return err
}
if err := method(ctx); err != nil {
return err
}
return nil
}

func delay() error {
ready, delay, err := fifoFromProcessEnv()
if err != nil {
return err
}
if err := ready.Trigger(); err != nil {
return err
}
return delay.Wait()
}

// fifoFromProcessEnv finds a fifo specified in the environment variables of an exec process
func fifoFromProcessEnv() (fifosync.Trigger, fifosync.Waiter, error) {
env, err := processEnvironment()
if err != nil {
return nil, nil, err
}

readyName, ok := env[failpoint.DelayExecReadyEnv]
if !ok {
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecReadyEnv, env)
}
delayName, ok := env[failpoint.DelayExecDelayEnv]
if !ok {
return nil, nil, fmt.Errorf("fifo: failed to find %q env var in %v", failpoint.DelayExecDelayEnv, env)
}
logrus.WithField("ready", readyName).WithField("delay", delayName).Debug("Found FIFOs!")
readyFIFO, err := fifosync.NewTrigger(readyName, 0600)
if err != nil {
return nil, nil, err
}
delayFIFO, err := fifosync.NewWaiter(delayName, 0600)
if err != nil {
return nil, nil, err
}
return readyFIFO, delayFIFO, nil
}

func processEnvironment() (map[string]string, error) {
idx := 2
for ; idx < len(os.Args); idx++ {
if os.Args[idx] == "--process" {
break
}
}

if idx >= len(os.Args)-1 || os.Args[idx] != "--process" {
return nil, errors.New("env: option --process required")
}

specFile := os.Args[idx+1]
f, err := os.OpenFile(specFile, os.O_RDONLY, 0o644)
if err != nil {
return nil, fmt.Errorf("env: failed to open %s: %w", specFile, err)
}

b, err := io.ReadAll(f)
if err != nil {
return nil, fmt.Errorf("env: failed to read spec from %q", specFile)
}
var spec specs.Process
if err := json.Unmarshal(b, &spec); err != nil {
return nil, fmt.Errorf("env: failed to unmarshal spec from %q: %w", specFile, err)
}

// XXX: env vars can be specified multiple times, but we only keep one
env := make(map[string]string)
for _, e := range spec.Env {
k, v, _ := strings.Cut(e, "=")
env[k] = v
}

return env, nil
}
2 changes: 2 additions & 0 deletions integration/failpoint/cmd/runc-fp/main.go
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ import (
"syscall"

"github.com/containerd/containerd/oci"

"github.com/sirupsen/logrus"
)

Expand All @@ -40,6 +41,7 @@ type invokerInterceptor func(context.Context, invoker) error
var (
failpointProfiles = map[string]invokerInterceptor{
"issue9103": issue9103KillInitAfterCreate,
"delayExec": delayExec,
}
)

Expand Down
22 changes: 22 additions & 0 deletions integration/failpoint/const.go
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
/*
Copyright The containerd Authors.
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.
*/

package failpoint

const (
DelayExecReadyEnv = "_RUNC_FP_DELAY_EXEC_READY"
DelayExecDelayEnv = "_RUNC_FP_DELAY_EXEC_DELAY"
)
Loading

0 comments on commit 172b137

Please sign in to comment.