Skip to content

Commit

Permalink
[nextest-runner] make Windows job object termination clearer
Browse files Browse the repository at this point in the history
In case of a Ctrl-C grace period expiring, we should make it clear to users
what's happening rather than just printing out "FAIL".
  • Loading branch information
sunshowers committed Dec 17, 2024
1 parent c612aea commit b930fa8
Show file tree
Hide file tree
Showing 9 changed files with 254 additions and 147 deletions.
6 changes: 3 additions & 3 deletions nextest-runner/src/errors.rs
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
use crate::{
cargo_config::{TargetTriple, TargetTripleSource},
config::{ConfigExperimental, CustomTestGroup, ScriptId, TestGroup},
helpers::{display_exit_status, dylib_path_envvar},
helpers::{display_exited_with, dylib_path_envvar},
redact::Redactor,
reuse_build::{ArchiveFormat, ArchiveStep},
target_runner::PlatformRunnerSource,
Expand Down Expand Up @@ -873,9 +873,9 @@ pub enum CreateTestListError {

/// Running a command to gather the list of tests failed failed with a non-zero exit code.
#[error(
"for `{binary_id}`, command `{}` exited with {}\n--- stdout:\n{}\n--- stderr:\n{}\n---",
"for `{binary_id}`, command `{}` {}\n--- stdout:\n{}\n--- stderr:\n{}\n---",
shell_words::join(command),
display_exit_status(*exit_status),
display_exited_with(*exit_status),
String::from_utf8_lossy(stdout),
String::from_utf8_lossy(stderr),
)]
Expand Down
17 changes: 10 additions & 7 deletions nextest-runner/src/helpers.rs
Original file line number Diff line number Diff line change
Expand Up @@ -296,36 +296,39 @@ impl fmt::Display for FormattedDuration {
}
}

pub(crate) fn display_exit_status(exit_status: ExitStatus) -> String {
// "exited with"/"terminated via"
pub(crate) fn display_exited_with(exit_status: ExitStatus) -> String {
match AbortStatus::extract(exit_status) {
Some(abort_status) => display_abort_status(abort_status),
None => match exit_status.code() {
Some(code) => format!("exit code {}", code),
None => "an unknown error".to_owned(),
Some(code) => format!("exited with exit code {}", code),
None => "exited with an unknown error".to_owned(),
},
}
}

/// Display the abort status.
/// Displays the abort status.
pub(crate) fn display_abort_status(abort_status: AbortStatus) -> String {
match abort_status {
#[cfg(unix)]
AbortStatus::UnixSignal(sig) => match crate::helpers::signal_str(sig) {
Some(s) => {
format!("signal {sig} (SIG{s})")
format!("aborted with signal {sig} (SIG{s})")
}
None => {
format!("signal {sig}")
format!("aborted with signal {sig}")
}
},
#[cfg(windows)]
AbortStatus::WindowsNtStatus(nt_status) => {
format!(
"code {}",
"aborted with code {}",
// TODO: pass down a style here
crate::helpers::display_nt_status(nt_status, Style::new())
)
}
#[cfg(windows)]
AbortStatus::JobObject => "terminated via job object".to_string(),
}
}

Expand Down
36 changes: 30 additions & 6 deletions nextest-runner/src/reporter/displayer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -1319,11 +1319,23 @@ impl<'a> TestReporterImpl<'a> {
// On Windows, also print out the exception if available.
#[cfg(windows)]
if let ExecutionResult::Fail {
abort_status: Some(AbortStatus::WindowsNtStatus(nt_status)),
abort_status: Some(abort_status),
leaked: _,
} = last_status.result
{
write_windows_message_line(nt_status, &self.styles, writer)?;
match abort_status {
AbortStatus::WindowsNtStatus(nt_status) => {
write_windows_message_line(nt_status, &self.styles, writer)?;
}
AbortStatus::JobObject => {
write!(
writer,
"{:>12} via {}",
"terminated".style(self.styles.fail),
"job object".style(self.styles.count),
)?;
}
}
}

Ok(())
Expand Down Expand Up @@ -1393,11 +1405,23 @@ impl<'a> TestReporterImpl<'a> {
// On Windows, also print out the exception if available.
#[cfg(windows)]
if let ExecutionResult::Fail {
abort_status: Some(AbortStatus::WindowsNtStatus(nt_status)),
abort_status: Some(abort_status),
leaked: _,
} = last_status.result
{
write_windows_message_line(nt_status, &self.styles, writer)?;
match abort_status {
AbortStatus::WindowsNtStatus(nt_status) => {
write_windows_message_line(nt_status, &self.styles, writer)?;
}
AbortStatus::JobObject => {
write!(
writer,
"{:>12} via {}",
"terminated".style(self.styles.fail),
"job object".style(self.styles.count),
)?;
}
}
}

Ok(())
Expand Down Expand Up @@ -2453,7 +2477,7 @@ fn status_str(result: ExecutionResult) -> Cow<'static, str> {
},
#[cfg(windows)]
ExecutionResult::Fail {
abort_status: Some(AbortStatus::WindowsNtStatus(_)),
abort_status: Some(AbortStatus::WindowsNtStatus(_)) | Some(AbortStatus::JobObject),
leaked: _,
} => {
// Going to print out the full error message on the following line -- just "ABORT" will
Expand Down Expand Up @@ -2488,7 +2512,7 @@ fn short_status_str(result: ExecutionResult) -> Cow<'static, str> {
},
#[cfg(windows)]
ExecutionResult::Fail {
abort_status: Some(AbortStatus::WindowsNtStatus(_)),
abort_status: Some(AbortStatus::WindowsNtStatus(_)) | Some(AbortStatus::JobObject),
leaked: _,
} => {
// Going to print out the full error message on the following line -- just "ABORT" will
Expand Down
22 changes: 2 additions & 20 deletions nextest-runner/src/reporter/error_description.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
use super::events::{AbortStatus, ExecutionResult, UnitKind};
use crate::{
errors::{ChildError, ChildStartError, ErrorList},
helpers::display_abort_status,
test_output::{ChildExecutionOutput, ChildOutput},
};
use bstr::ByteSlice;
Expand Down Expand Up @@ -152,26 +153,7 @@ struct UnitAbortDescription {

impl fmt::Display for UnitAbortDescription {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
write!(f, "process aborted")?;
match self.status {
#[cfg(unix)]
AbortStatus::UnixSignal(sig) => {
let signal_str = crate::helpers::signal_str(sig)
.map(|signal_str| format!("SIG{signal_str}"))
.unwrap_or_else(|| sig.to_string());
write!(f, " with signal {signal_str}")?;
}
#[cfg(windows)]
AbortStatus::WindowsNtStatus(exception) => {
write!(
f,
" with code {}",
// TODO: pass in bold style (probably need to not use
// fmt::Display)
crate::helpers::display_nt_status(exception, owo_colors::Style::new())
)?;
}
}
write!(f, "process {}", display_abort_status(self.status))?;
if self.leaked {
write!(f, ", and also leaked handles")?;
}
Expand Down
6 changes: 5 additions & 1 deletion nextest-runner/src/reporter/events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -741,7 +741,7 @@ pub enum ExecutionResult {
},
/// An error occurred while executing the test.
ExecFail,
/// The test was terminated due to timeout.
/// The test was terminated due to a timeout.
Timeout,
}

Expand Down Expand Up @@ -769,6 +769,10 @@ pub enum AbortStatus {
/// The test was determined to have aborted because the high bit was set on Windows.
#[cfg(windows)]
WindowsNtStatus(windows_sys::Win32::Foundation::NTSTATUS),

/// The test was terminated via job object on Windows.
#[cfg(windows)]
JobObject,
}

impl AbortStatus {
Expand Down
88 changes: 76 additions & 12 deletions nextest-runner/src/runner/executor.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
//! just a better abstraction, it also provides a better user experience (less
//! inconsistent state).
use super::HandleSignalResult;
use crate::{
config::{
EvaluatableProfile, RetryPolicy, ScriptConfig, ScriptId, SetupScriptCommand,
Expand Down Expand Up @@ -419,10 +420,14 @@ impl<'a> ExecutorContext<'a> {
}

if will_terminate {
// attempt to terminate the slow test.
// as there is a race between shutting down a slow test and its own completion
// we silently ignore errors to avoid printing false warnings.
super::os::terminate_child(
// Attempt to terminate the slow script. As there is
// a race between shutting down a slow test and its
// own completion, we silently ignore errors to
// avoid printing false warnings.
//
// The return result of terminate_child is not used
// here, since it is always marked as a timeout.
_ = super::os::terminate_child(
&cx,
&mut child,
&mut child_acc,
Expand All @@ -449,7 +454,8 @@ impl<'a> ExecutorContext<'a> {

match req {
RunUnitRequest::Signal(req) => {
handle_signal_request(
#[cfg_attr(not(windows), expect(unused_variables))]
let res = handle_signal_request(
&cx,
&mut child,
&mut child_acc,
Expand All @@ -460,6 +466,31 @@ impl<'a> ExecutorContext<'a> {
job.as_ref(),
slow_timeout.grace_period
).await;

// On Unix, the signal the process exited with
// will be picked up by child.wait. On Windows,
// termination by job object will show up as
// exit code 1 -- we need to be clearer about
// that in the UI.
//
// TODO: Can we do something useful with res on
// Unix? For example, it's possible that the
// signal we send is not the same as the signal
// the child exits with. This might be a good
// thing to store in whatever test event log we
// end up building.
#[cfg(windows)]
{
if matches!(
res,
HandleSignalResult::Terminated(super::TerminateChildResult::Killed)
) {
status = Some(ExecutionResult::Fail {
abort_status: Some(AbortStatus::JobObject),
leaked: false,
});
}
}
}
RunUnitRequest::OtherCancel => {
// Ignore non-signal cancellation requests --
Expand Down Expand Up @@ -666,10 +697,14 @@ impl<'a> ExecutorContext<'a> {
}

if will_terminate {
// Attempt to terminate the slow test. As there is a race between
// shutting down a slow test and its own completion, we silently ignore
// errors to avoid printing false warnings.
super::os::terminate_child(
// Attempt to terminate the slow test. As there is a
// race between shutting down a slow test and its
// own completion, we silently ignore errors to
// avoid printing false warnings.
//
// The return result of terminate_child is not used
// here, since it is always marked as a timeout.
_ = super::os::terminate_child(
&cx,
&mut child,
&mut child_acc,
Expand All @@ -695,7 +730,8 @@ impl<'a> ExecutorContext<'a> {

match req {
RunUnitRequest::Signal(req) => {
handle_signal_request(
#[cfg_attr(not(windows), expect(unused_variables))]
let res = handle_signal_request(
&cx,
&mut child,
&mut child_acc,
Expand All @@ -706,6 +742,31 @@ impl<'a> ExecutorContext<'a> {
job.as_ref(),
slow_timeout.grace_period,
).await;

// On Unix, the signal the process exited with
// will be picked up by child.wait. On Windows,
// termination by job object will show up as
// exit code 1 -- we need to be clearer about
// that in the UI.
//
// TODO: Can we do something useful with res on
// Unix? For example, it's possible that the
// signal we send is not the same as the signal
// the child exits with. This might be a good
// thing to store in whatever test event log we
// end up building.
#[cfg(windows)]
{
if matches!(
res,
HandleSignalResult::Terminated(super::TerminateChildResult::Killed)
) {
status = Some(ExecutionResult::Fail {
abort_status: Some(AbortStatus::JobObject),
leaked: false,
});
}
}
}
RunUnitRequest::OtherCancel => {
// Ignore non-signal cancellation requests --
Expand Down Expand Up @@ -1158,7 +1219,7 @@ async fn handle_signal_request<'a>(
req_rx: &mut UnboundedReceiver<RunUnitRequest<'a>>,
job: Option<&super::os::Job>,
grace_period: Duration,
) {
) -> HandleSignalResult {
match req {
#[cfg(unix)]
SignalRequest::Stop(sender) => {
Expand All @@ -1170,6 +1231,7 @@ async fn handle_signal_request<'a>(
// The receiver being dead probably means the main thread panicked
// or similar.
let _ = sender.send(());
HandleSignalResult::JobControl
}
#[cfg(unix)]
SignalRequest::Continue => {
Expand All @@ -1180,9 +1242,10 @@ async fn handle_signal_request<'a>(
interval_sleep.as_mut().resume();
super::os::job_control_child(child, crate::signal::JobControlEvent::Continue);
}
HandleSignalResult::JobControl
}
SignalRequest::Shutdown(event) => {
super::os::terminate_child(
let res = super::os::terminate_child(
cx,
child,
child_acc,
Expand All @@ -1193,6 +1256,7 @@ async fn handle_signal_request<'a>(
grace_period,
)
.await;
HandleSignalResult::Terminated(res)
}
}
}
Expand Down
22 changes: 22 additions & 0 deletions nextest-runner/src/runner/internal_events.rs
Original file line number Diff line number Diff line change
Expand Up @@ -249,3 +249,25 @@ impl RunnerTaskState {
}
}
}

#[derive(Clone, Copy, Debug)]
#[must_use]
pub(super) enum HandleSignalResult {
/// A job control signal was delivered.
#[cfg(unix)]
JobControl,

/// The child was terminated.
#[cfg_attr(not(windows), expect(dead_code))]
Terminated(TerminateChildResult),
}

#[derive(Clone, Copy, Debug)]
#[must_use]
pub(super) enum TerminateChildResult {
/// The child process exited without being forcibly killed.
Exited,

/// The child process was forcibly killed.
Killed,
}
Loading

0 comments on commit b930fa8

Please sign in to comment.