diff --git a/runner/util_linux.go b/runner/util_linux.go index 658671b3..c8bbf8b6 100644 --- a/runner/util_linux.go +++ b/runner/util_linux.go @@ -1,6 +1,8 @@ package runner import ( + "context" + "errors" "io" "os/exec" "syscall" @@ -12,20 +14,65 @@ import ( func (e *Engine) killCmd(cmd *exec.Cmd) (pid int, err error) { pid = cmd.Process.Pid + var killDelay time.Duration + + waitResult := make(chan error) + go func() { + defer close(waitResult) + _, _ = cmd.Process.Wait() + }() + if e.config.Build.SendInterrupt { - // Sending a signal to make it clear to the process that it is time to turn off + e.mainDebug("sending interrupt to process %d", pid) + // Sending a signal to make it clear to the process group that it is time to turn off if err = syscall.Kill(-pid, syscall.SIGINT); err != nil { return } - time.Sleep(e.config.killDelay()) + // the kill delay is 0 by default unless the user has configured send_interrupt=true + // in which case it is fetched from the kill_delay setting in the .air.toml + killDelay = e.config.killDelay() + e.mainDebug("setting a kill timer for %s", killDelay.String()) } - // https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly - err = syscall.Kill(-pid, syscall.SIGKILL) + // prepare a cancel context that can stop the killing if it is not needed + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + killResult := make(chan error) + // Spawn a goroutine that will kill the process after kill delay if we have not + // received a wait result before that. + go func() { + select { + case <-time.After(killDelay): + e.mainDebug("kill timer expired") + // https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly + killResult <- syscall.Kill(-pid, syscall.SIGKILL) + case <-ctx.Done(): + e.mainDebug("kill timer canceled") + return + } + }() + + results := make([]error, 0, 2) - // Wait releases any resources associated with the Process. - _, _ = cmd.Process.Wait() - return + for { + // collect the responses from the kill and wait goroutines + select { + case err = <-killResult: + results = append(results, err) + case err = <-waitResult: + results = append(results, err) + // if we have a kill delay, we ignore the kill result + if killDelay > 0 && len(results) == 1 { + results = append(results, nil) + } + } + + if len(results) == 2 { + err = errors.Join(results...) + return + } + } } func (e *Engine) startCmd(cmd string) (*exec.Cmd, io.ReadCloser, io.ReadCloser, error) { diff --git a/runner/util_test.go b/runner/util_test.go index 275d5b78..8fd0a4b8 100644 --- a/runner/util_test.go +++ b/runner/util_test.go @@ -1,7 +1,6 @@ package runner import ( - "errors" "fmt" "os" "os/exec" @@ -10,7 +9,6 @@ import ( "runtime" "strconv" "strings" - "syscall" "testing" "time" @@ -148,27 +146,6 @@ func TestAdaptToVariousPlatforms(t *testing.T) { } } -func Test_killCmd_no_process(t *testing.T) { - e := Engine{ - config: &Config{ - Build: cfgBuild{ - SendInterrupt: false, - }, - }, - } - _, err := e.killCmd(&exec.Cmd{ - Process: &os.Process{ - Pid: 9999, - }, - }) - if err == nil { - t.Errorf("expected error but got none") - } - if !errors.Is(err, syscall.ESRCH) { - t.Errorf("expected '%s' but got '%s'", syscall.ESRCH, errors.Unwrap(err)) - } -} - func Test_killCmd_SendInterrupt_false(t *testing.T) { _, b, _, _ := runtime.Caller(0) diff --git a/runner/util_unix.go b/runner/util_unix.go index afd47c89..6201871c 100644 --- a/runner/util_unix.go +++ b/runner/util_unix.go @@ -3,6 +3,8 @@ package runner import ( + "context" + "errors" "io" "os" "os/exec" @@ -13,18 +15,65 @@ import ( func (e *Engine) killCmd(cmd *exec.Cmd) (pid int, err error) { pid = cmd.Process.Pid + var killDelay time.Duration + + waitResult := make(chan error) + go func() { + defer close(waitResult) + _, _ = cmd.Process.Wait() + }() + if e.config.Build.SendInterrupt { - // Sending a signal to make it clear to the process that it is time to turn off + e.mainDebug("sending interrupt to process %d", pid) + // Sending a signal to make it clear to the process group that it is time to turn off if err = syscall.Kill(-pid, syscall.SIGINT); err != nil { return } - time.Sleep(e.config.killDelay()) + // the kill delay is 0 by default unless the user has configured send_interrupt=true + // in which case it is fetched from the kill_delay setting in the .air.toml + killDelay = e.config.killDelay() + e.mainDebug("setting a kill timer for %s", killDelay.String()) + } + + // prepare a cancel context that can stop the killing if it is not needed + ctx, cancel := context.WithCancel(context.Background()) + defer cancel() + + killResult := make(chan error) + // Spawn a goroutine that will kill the process after kill delay if we have not + // received a wait result before that. + go func() { + select { + case <-time.After(killDelay): + e.mainDebug("kill timer expired") + // https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly + killResult <- syscall.Kill(-pid, syscall.SIGKILL) + case <-ctx.Done(): + e.mainDebug("kill timer canceled") + return + } + }() + + results := make([]error, 0, 2) + + for { + // collect the responses from the kill and wait goroutines + select { + case err = <-killResult: + results = append(results, err) + case err = <-waitResult: + results = append(results, err) + // if we have a kill delay, we ignore the kill result + if killDelay > 0 && len(results) == 1 { + results = append(results, nil) + } + } + + if len(results) == 2 { + err = errors.Join(results...) + return + } } - // https://stackoverflow.com/questions/22470193/why-wont-go-kill-a-child-process-correctly - err = syscall.Kill(-pid, syscall.SIGKILL) - // Wait releases any resources associated with the Process. - _, _ = cmd.Process.Wait() - return pid, err } func (e *Engine) startCmd(cmd string) (*exec.Cmd, io.ReadCloser, io.ReadCloser, error) {