Skip to content

Commit

Permalink
[nextest-runner] ensure that the second signal always results in SIGKILL
Browse files Browse the repository at this point in the history
It was always the intention to do this, but this broke at some point :( Unbreak
it and add tests.
  • Loading branch information
sunshowers committed Dec 11, 2024
1 parent 09c81f5 commit c6d034b
Show file tree
Hide file tree
Showing 5 changed files with 177 additions and 114 deletions.
2 changes: 1 addition & 1 deletion nextest-runner/src/reporter/aggregator.rs
Original file line number Diff line number Diff line change
Expand Up @@ -202,7 +202,7 @@ impl<'cfg> MetadataJunit<'cfg> {
//
// testsuite.add_testcase(testcase);
}
TestEventKind::RunBeginCancel { .. } => {}
TestEventKind::RunBeginCancel { .. } | TestEventKind::RunBeginKill { .. } => {}
TestEventKind::RunFinished {
run_id,
start_time,
Expand Down
53 changes: 47 additions & 6 deletions nextest-runner/src/reporter/displayer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -460,8 +460,9 @@ impl ProgressBarState {
// continue to output to it.
self.hidden_run_paused = false;
}
TestEventKind::RunBeginCancel { .. } => {
progress_bar.set_prefix(progress_bar_cancel_prefix(styles));
TestEventKind::RunBeginCancel { reason, .. }
| TestEventKind::RunBeginKill { reason, .. } => {
progress_bar.set_prefix(progress_bar_cancel_prefix(*reason, styles));
}
_ => {}
}
Expand Down Expand Up @@ -540,17 +541,25 @@ impl<'a> TestReporter<'a> {
}
}

fn progress_bar_cancel_prefix(styles: &Styles) -> String {
format!("{:>12}", "Cancelling".style(styles.fail))
fn progress_bar_cancel_prefix(reason: CancelReason, styles: &Styles) -> String {
let status = match reason {
CancelReason::SetupScriptFailure
| CancelReason::TestFailure
| CancelReason::ReportError
| CancelReason::Signal
| CancelReason::Interrupt => "Cancelling",
CancelReason::SecondSignal => "Killing",
};
format!("{:>12}", status.style(styles.fail))
}

fn progress_bar_prefix(
run_stats: &RunStats,
cancel_reason: Option<CancelReason>,
styles: &Styles,
) -> String {
if cancel_reason.is_some() {
return progress_bar_cancel_prefix(styles);
if let Some(reason) = cancel_reason {
return progress_bar_cancel_prefix(reason, styles);
}

let style = if run_stats.has_failures() {
Expand Down Expand Up @@ -972,6 +981,38 @@ impl<'a> TestReporterImpl<'a> {
}
writeln!(writer)?;
}
TestEventKind::RunBeginKill {
setup_scripts_running,
running,
reason,
} => {
self.cancel_status = self.cancel_status.max(Some(*reason));

write!(
writer,
"{:>12} due to {}",
"Killing".style(self.styles.fail),
reason.to_static_str().style(self.styles.fail)
)?;

// At the moment, we can have either setup scripts or tests running, but not both.
if *setup_scripts_running > 0 {
let s = plural::setup_scripts_str(*setup_scripts_running);
write!(
writer,
": {} {s} still running",
setup_scripts_running.style(self.styles.count),
)?;
} else if *running > 0 {
let tests_str = plural::tests_str(*running);
write!(
writer,
": {} {tests_str} still running",
running.style(self.styles.count),
)?;
}
writeln!(writer)?;
}
TestEventKind::RunPaused {
setup_scripts_running,
running,
Expand Down
16 changes: 16 additions & 0 deletions nextest-runner/src/reporter/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -257,6 +257,18 @@ pub enum TestEventKind<'a> {
reason: CancelReason,
},

/// A forcible kill was requested due to receiving a signal.
RunBeginKill {
/// The number of setup scripts still running.
setup_scripts_running: usize,

/// The number of tests still running.
running: usize,

/// The reason this run was killed.
reason: CancelReason,
},

/// A SIGTSTP event was received and the run was paused.
RunPaused {
/// The number of setup scripts running.
Expand Down Expand Up @@ -777,6 +789,9 @@ pub enum CancelReason {

/// An interrupt (on Unix, Ctrl-C) was received.
Interrupt,

/// A second signal was received, and the run is being forcibly killed.
SecondSignal,
}

impl CancelReason {
Expand All @@ -787,6 +802,7 @@ impl CancelReason {
CancelReason::ReportError => "reporting error",
CancelReason::Signal => "signal",
CancelReason::Interrupt => "interrupt",
CancelReason::SecondSignal => "second signal",
}
}
}
Expand Down
207 changes: 100 additions & 107 deletions nextest-runner/src/runner/dispatcher.rs
Original file line number Diff line number Diff line change
Expand Up @@ -635,14 +635,7 @@ where
SignalEvent::Shutdown(event) => {
let signal_count = self.increment_signal_count();
let req = signal_count.to_request(event);

let cancel_reason = match event {
#[cfg(unix)]
ShutdownEvent::Hangup | ShutdownEvent::Term | ShutdownEvent::Quit => {
CancelReason::Signal
}
ShutdownEvent::Interrupt => CancelReason::Interrupt,
};
let cancel_reason = event_to_cancel_reason(event);

self.begin_cancel(cancel_reason, CancelEvent::Signal(req))
}
Expand Down Expand Up @@ -728,8 +721,19 @@ where
///
/// Returns the corresponding `HandleEventResponse`.
fn begin_cancel(&mut self, reason: CancelReason, event: CancelEvent) -> HandleEventResponse {
// TODO: combine reason and event?
if self.cancel_state < Some(reason) {
// TODO: combine reason and event? The Twice block ignoring the event
// seems to indicate a data modeling issue.
if event == CancelEvent::Signal(ShutdownRequest::Twice) {
// Forcibly kill child processes in the case of a second shutdown
// signal.
self.basic_callback(TestEventKind::RunBeginKill {
setup_scripts_running: self.setup_scripts_running(),
running: self.running(),
// This is always a second signal.
reason: CancelReason::SecondSignal,
});
HandleEventResponse::Cancel(event)
} else if self.cancel_state < Some(reason) {
self.cancel_state = Some(reason);
self.basic_callback(TestEventKind::RunBeginCancel {
setup_scripts_running: self.setup_scripts_running(),
Expand Down Expand Up @@ -757,6 +761,14 @@ where
}
}

fn event_to_cancel_reason(event: ShutdownEvent) -> CancelReason {
match event {
#[cfg(unix)]
ShutdownEvent::Hangup | ShutdownEvent::Term | ShutdownEvent::Quit => CancelReason::Signal,
ShutdownEvent::Interrupt => CancelReason::Interrupt,
}
}

#[derive(Clone, Debug)]
struct ContextSetupScript<'a> {
id: ScriptId,
Expand Down Expand Up @@ -877,123 +889,104 @@ mod tests {
"expected report"
);
{
let events = events.lock().unwrap();
let mut events = events.lock().unwrap();
assert_eq!(events.len(), 1, "expected 1 event");
let event = events.pop().unwrap();
let TestEventKind::RunBeginCancel {
setup_scripts_running,
running,
reason,
} = &events[0].kind
} = event.kind
else {
panic!("expected RunBeginCancel event, found {:?}", events[0].kind);
panic!("expected RunBeginCancel event, found {:?}", event.kind);
};
assert_eq!(
*setup_scripts_running, 0,
"expected 0 setup scripts running"
);
assert_eq!(*running, 0, "expected 0 tests running");
assert_eq!(*reason, CancelReason::ReportError, "expected report error");
assert_eq!(setup_scripts_running, 0, "expected 0 setup scripts running");
assert_eq!(running, 0, "expected 0 tests running");
assert_eq!(reason, CancelReason::ReportError, "expected report error");
}

// Send another report error, ensuring it's ignored.
let response = cx.handle_event(InternalEvent::ReportCancel);
assert_noop(response, &events, 1);

// Cancel with a signal.
#[cfg(unix)]
let signal_event_count = {
let response = cx.handle_event(InternalEvent::Signal(SignalEvent::Shutdown(
ShutdownEvent::Hangup,
)));
assert_eq!(
response,
HandleEventResponse::Cancel(CancelEvent::Signal(ShutdownRequest::Once(
ShutdownEvent::Hangup
))),
"expected cancellation"
);
{
let events = events.lock().unwrap();
assert_eq!(events.len(), 2, "expected 2 events");
let TestEventKind::RunBeginCancel {
setup_scripts_running,
running,
reason,
} = &events[1].kind
else {
panic!("expected RunBeginCancel event, found {:?}", events[1].kind);
};
assert_noop(response, &events);

// The rules:
// * Any one signal will cause that signal.
// * Any two signals received will cause a SIGKILL.
// * After a signal is received, any less-important cancel-worthy events
// are ignored.
//
// Interestingly, this state machine appears to function on Windows too
// (though of course the only variant is an Interrupt so this only runs
// one iteration.) Should it be different? No compelling reason to be
// yet.
for sig1 in ShutdownEvent::ALL_VARIANTS {
for sig2 in ShutdownEvent::ALL_VARIANTS {
eprintln!("** testing {:?} -> {:?}", sig1, sig2);
// Separate test for each signal to avoid mixing up state.
let mut cx = cx.clone();

// First signal.
let response = cx.handle_event(InternalEvent::Signal(SignalEvent::Shutdown(*sig1)));
assert_eq!(
*setup_scripts_running, 0,
"expected 0 setup scripts running"
response,
HandleEventResponse::Cancel(CancelEvent::Signal(ShutdownRequest::Once(*sig1))),
"expected Once"
);
assert_eq!(*running, 0, "expected 0 tests running");
assert_eq!(*reason, CancelReason::Signal, "expected signal");
}

// Send another signal, ensuring it's ignored.
let response = cx.handle_event(InternalEvent::Signal(SignalEvent::Shutdown(
ShutdownEvent::Hangup,
)));
assert_noop(response, &events, 2);

// Send a report error, ensuring it's ignored.
let response = cx.handle_event(InternalEvent::ReportCancel);
assert_noop(response, &events, 2);
{
let mut events = events.lock().unwrap();
assert_eq!(events.len(), 1, "expected 1 event");
let event = events.pop().unwrap();
let TestEventKind::RunBeginCancel {
setup_scripts_running,
running,
reason,
} = event.kind
else {
panic!("expected RunBeginCancel event, found {:?}", event.kind);
};
assert_eq!(setup_scripts_running, 0, "expected 0 setup scripts running");
assert_eq!(running, 0, "expected 0 tests running");
assert_eq!(reason, event_to_cancel_reason(*sig1), "expected signal");
}

1
};
#[cfg(not(unix))]
let signal_event_count = 0;
// Another report error, ensuring it's ignored.
let response = cx.handle_event(InternalEvent::ReportCancel);
assert_noop(response, &events);

// Cancel with an interrupt.
let response = cx.handle_event(InternalEvent::Signal(SignalEvent::Shutdown(
ShutdownEvent::Interrupt,
)));
assert_eq!(
response,
if signal_event_count == 0 {
// On Windows, this is the first signal.
HandleEventResponse::Cancel(CancelEvent::Signal(ShutdownRequest::Once(
ShutdownEvent::Interrupt,
)))
} else {
// On Unix, this is the second signal.
HandleEventResponse::Cancel(CancelEvent::Signal(ShutdownRequest::Twice))
},
"expected cancellation"
);
{
let events = events.lock().unwrap();
assert_eq!(events.len(), 2 + signal_event_count, "expected event count");
let TestEventKind::RunBeginCancel {
setup_scripts_running,
running,
reason,
} = &events[1 + signal_event_count].kind
else {
panic!(
"expected RunBeginCancel event, found {:?}",
events[1 + signal_event_count].kind
// Second signal.
let response = cx.handle_event(InternalEvent::Signal(SignalEvent::Shutdown(*sig2)));
assert_eq!(
response,
HandleEventResponse::Cancel(CancelEvent::Signal(ShutdownRequest::Twice)),
"expected kill"
);
};
assert_eq!(
*setup_scripts_running, 0,
"expected 0 setup scripts running"
);
assert_eq!(*running, 0, "expected 0 tests running");
assert_eq!(*reason, CancelReason::Interrupt, "expected interrupt");
{
let mut events = events.lock().unwrap();
assert_eq!(events.len(), 1, "expected 1 events");
let event = events.pop().unwrap();
let TestEventKind::RunBeginKill {
setup_scripts_running,
running,
reason,
} = event.kind
else {
panic!("expected RunBeginKill event, found {:?}", event.kind);
};
assert_eq!(setup_scripts_running, 0, "expected 0 setup scripts running");
assert_eq!(running, 0, "expected 0 tests running");
assert_eq!(reason, CancelReason::SecondSignal, "expected second signal");
}

// Another report error, ensuring it's ignored.
let response = cx.handle_event(InternalEvent::ReportCancel);
assert_noop(response, &events);
}
}
}

#[track_caller]
fn assert_noop(
response: HandleEventResponse,
events: &Mutex<Vec<TestEvent<'_>>>,
event_count: usize,
) {
fn assert_noop(response: HandleEventResponse, events: &Mutex<Vec<TestEvent<'_>>>) {
assert_eq!(response, HandleEventResponse::None, "expected no response");
let events = events.lock().unwrap();
assert_eq!(events.len(), event_count, "expected no new events");
assert_eq!(events.lock().unwrap().len(), 0, "expected no new events");
}
}
Loading

0 comments on commit c6d034b

Please sign in to comment.