From 16272d7fc6d5f5bf72fe8f4deb5216a54d8a37e8 Mon Sep 17 00:00:00 2001 From: Mike Zeller Date: Tue, 10 Dec 2024 21:46:56 +0000 Subject: [PATCH] [spr] changes to main this commit is based on Created using spr 1.3.6-beta.1 [skip ci] --- Cargo.lock | 15 ++ Cargo.toml | 3 + openapi/sled-agent.json | 42 +++++ sled-agent/Cargo.toml | 1 + sled-agent/api/src/lib.rs | 16 ++ sled-agent/src/http_entrypoints.rs | 34 +++- sled-agent/src/sim/http_entrypoints.rs | 12 ++ sled-agent/src/sled_agent.rs | 29 ++- sled-agent/src/support_bundle/mod.rs | 1 - sled-diagnostics/.gitignore | 1 + sled-diagnostics/Cargo.toml | 17 ++ sled-diagnostics/src/contract.rs | 172 ++++++++++++++++++ sled-diagnostics/src/contract_stub.rs | 18 ++ sled-diagnostics/src/lib.rs | 104 +++++++++++ .../src}/queries.rs | 138 +++++++------- 15 files changed, 519 insertions(+), 84 deletions(-) create mode 100644 sled-diagnostics/.gitignore create mode 100644 sled-diagnostics/Cargo.toml create mode 100644 sled-diagnostics/src/contract.rs create mode 100644 sled-diagnostics/src/contract_stub.rs create mode 100644 sled-diagnostics/src/lib.rs rename {sled-agent/src/support_bundle => sled-diagnostics/src}/queries.rs (69%) diff --git a/Cargo.lock b/Cargo.lock index 71ead293b1..03dcab43ee 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7264,6 +7264,7 @@ dependencies = [ "sled-agent-api", "sled-agent-client", "sled-agent-types", + "sled-diagnostics", "sled-hardware", "sled-hardware-types", "sled-storage", @@ -10734,6 +10735,20 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-diagnostics" +version = "0.1.0" +dependencies = [ + "cfg-if", + "fs-err", + "futures", + "libc", + "omicron-workspace-hack", + "slog", + "thiserror", + "tokio", +] + [[package]] name = "sled-hardware" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c0f4be8bae..c9fc5e300d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,7 @@ members = [ "sled-agent/bootstrap-agent-api", "sled-agent/repo-depot-api", "sled-agent/types", + "sled-diagnostics", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -240,6 +241,7 @@ default-members = [ "sled-agent/bootstrap-agent-api", "sled-agent/repo-depot-api", "sled-agent/types", + "sled-diagnostics", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -596,6 +598,7 @@ sled = "=0.34.7" sled-agent-api = { path = "sled-agent/api" } sled-agent-client = { path = "clients/sled-agent-client" } sled-agent-types = { path = "sled-agent/types" } +sled-diagnostics = { path = "sled-diagnostics" } sled-hardware = { path = "sled-hardware" } sled-hardware-types = { path = "sled-hardware/types" } sled-storage = { path = "sled-storage" } diff --git a/openapi/sled-agent.json b/openapi/sled-agent.json index 14a4c92692..90c169e579 100644 --- a/openapi/sled-agent.json +++ b/openapi/sled-agent.json @@ -715,6 +715,48 @@ } } }, + "/support/pargs-info": { + "get": { + "operationId": "support_pargs_info", + "responses": { + "200": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, + "/support/pstack-info": { + "get": { + "operationId": "support_pstack_info", + "responses": { + "200": { + "description": "", + "content": { + "*/*": { + "schema": {} + } + } + }, + "4XX": { + "$ref": "#/components/responses/Error" + }, + "5XX": { + "$ref": "#/components/responses/Error" + } + } + } + }, "/support/zoneadm-info": { "get": { "operationId": "support_zoneadm_info", diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 10b4ba1cdb..88c758dc31 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -81,6 +81,7 @@ sha3.workspace = true sled-agent-api.workspace = true sled-agent-client.workspace = true sled-agent-types.workspace = true +sled-diagnostics.workspace = true sled-hardware.workspace = true sled-hardware-types.workspace = true sled-storage.workspace = true diff --git a/sled-agent/api/src/lib.rs b/sled-agent/api/src/lib.rs index 634640079a..34d519012e 100644 --- a/sled-agent/api/src/lib.rs +++ b/sled-agent/api/src/lib.rs @@ -609,6 +609,22 @@ pub trait SledAgentApi { async fn support_dladm_info( request_context: RequestContext, ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/support/pargs-info", + }] + async fn support_pargs_info( + request_context: RequestContext, + ) -> Result, HttpError>; + + #[endpoint { + method = GET, + path = "/support/pstack-info", + }] + async fn support_pstack_info( + request_context: RequestContext, + ) -> Result, HttpError>; } #[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)] diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 844e13151a..0148106ab6 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -6,7 +6,6 @@ use super::sled_agent::SledAgent; use crate::sled_agent::Error as SledAgentError; -use crate::support_bundle::queries::SupportBundleCommandHttpOutput; use crate::zone_bundle::BundleError; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; @@ -53,6 +52,7 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, StorageLimit, ZoneBundleId, ZoneBundleMetadata, }; +use sled_diagnostics::SledDiagnosticsCommandHttpOutput; use std::collections::BTreeMap; type SledApiDescription = ApiDescription; @@ -902,4 +902,36 @@ impl SledAgentApi for SledAgentImpl { Ok(HttpResponseOk(FreeformBody(output.into()))) } + + async fn support_pargs_info( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + let output = sa + .support_pargs_info() + .await + .into_iter() + .map(|cmd| cmd.get_output()) + .collect::>() + .as_slice() + .join("\n\n"); + + Ok(HttpResponseOk(FreeformBody(output.into()))) + } + + async fn support_pstack_info( + request_context: RequestContext, + ) -> Result, HttpError> { + let sa = request_context.context(); + let output = sa + .support_pstack_info() + .await + .into_iter() + .map(|cmd| cmd.get_output()) + .collect::>() + .as_slice() + .join("\n\n"); + + Ok(HttpResponseOk(FreeformBody(output.into()))) + } } diff --git a/sled-agent/src/sim/http_entrypoints.rs b/sled-agent/src/sim/http_entrypoints.rs index 2d23f9150b..72a45660eb 100644 --- a/sled-agent/src/sim/http_entrypoints.rs +++ b/sled-agent/src/sim/http_entrypoints.rs @@ -653,6 +653,18 @@ impl SledAgentApi for SledAgentSimImpl { ) -> Result, HttpError> { method_unimplemented() } + + async fn support_pargs_info( + _request_context: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } + + async fn support_pstack_info( + _request_context: RequestContext, + ) -> Result, HttpError> { + method_unimplemented() + } } fn method_unimplemented() -> Result { diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 80dbe72ea3..d0a653268e 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -19,10 +19,6 @@ use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; use crate::storage_monitor::StorageMonitorHandle; -use crate::support_bundle::queries::{ - dladm_info, ipadm_info, zoneadm_info, SupportBundleCmdError, - SupportBundleCmdOutput, -}; use crate::support_bundle::storage::SupportBundleManager; use crate::updates::{ConfigUpdates, UpdateManager}; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; @@ -76,6 +72,7 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, PriorityOrder, StorageLimit, ZoneBundleMetadata, }; +use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput}; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; @@ -1367,20 +1364,32 @@ impl SledAgent { pub(crate) async fn support_zoneadm_info( &self, - ) -> Result { - zoneadm_info().await + ) -> Result { + sled_diagnostics::zoneadm_info().await } pub(crate) async fn support_ipadm_info( &self, - ) -> Vec> { - ipadm_info().await + ) -> Vec> { + sled_diagnostics::ipadm_info().await } pub(crate) async fn support_dladm_info( &self, - ) -> Vec> { - dladm_info().await + ) -> Vec> { + sled_diagnostics::dladm_info().await + } + + pub(crate) async fn support_pargs_info( + &self, + ) -> Vec> { + sled_diagnostics::pargs_oxide_processes(&self.log).await + } + + pub(crate) async fn support_pstack_info( + &self, + ) -> Vec> { + sled_diagnostics::pargs_oxide_processes(&self.log).await } } diff --git a/sled-agent/src/support_bundle/mod.rs b/sled-agent/src/support_bundle/mod.rs index 314edfaec8..a1c4942751 100644 --- a/sled-agent/src/support_bundle/mod.rs +++ b/sled-agent/src/support_bundle/mod.rs @@ -2,5 +2,4 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -pub mod queries; pub mod storage; diff --git a/sled-diagnostics/.gitignore b/sled-diagnostics/.gitignore new file mode 100644 index 0000000000..ea8c4bf7f3 --- /dev/null +++ b/sled-diagnostics/.gitignore @@ -0,0 +1 @@ +/target diff --git a/sled-diagnostics/Cargo.toml b/sled-diagnostics/Cargo.toml new file mode 100644 index 0000000000..9e10dfe1c7 --- /dev/null +++ b/sled-diagnostics/Cargo.toml @@ -0,0 +1,17 @@ +[package] +name = "sled-diagnostics" +version = "0.1.0" +edition = "2021" + +[lints] +workspace = true + +[dependencies] +cfg-if.workspace = true +fs-err.workspace = true +futures.workspace = true +libc.workspace = true +omicron-workspace-hack.workspace = true +slog.workspace = true +thiserror.workspace = true +tokio = { workspace = true, features = ["full"] } diff --git a/sled-diagnostics/src/contract.rs b/sled-diagnostics/src/contract.rs new file mode 100644 index 0000000000..8b090b3193 --- /dev/null +++ b/sled-diagnostics/src/contract.rs @@ -0,0 +1,172 @@ +use fs_err as fs; +use libc::{c_char, c_int, c_void, pid_t}; +use slog::{warn, Logger}; +use thiserror::Error; + +use std::{ + collections::BTreeSet, + ffi::{CStr, CString}, + os::fd::AsRawFd, + path::Path, +}; + +const CT_ALL: &str = "/system/contract/all"; +const OXIDE_FMRI: &str = "svc:/oxide/"; +const CTD_ALL: i32 = 2; + +#[allow(non_camel_case_types)] +type ct_stathdl_t = *mut c_void; + +#[link(name = "contract")] +extern "C" { + fn ct_status_read( + fd: c_int, + detail: c_int, + stathdlp: *mut ct_stathdl_t, + ) -> c_int; + fn ct_status_free(stathdlp: ct_stathdl_t); + fn ct_status_get_id(stathdlp: ct_stathdl_t) -> i32; + fn ct_pr_status_get_members( + stathdlp: ct_stathdl_t, + pidpp: *mut *mut pid_t, + n: *mut u32, + ) -> c_int; + fn ct_pr_status_get_svc_fmri( + stathdlp: ct_stathdl_t, + fmri: *mut *mut c_char, + ) -> c_int; +} + +#[derive(Error, Debug)] +pub enum ContractError { + #[error(transparent)] + FileIo(#[from] std::io::Error), + #[error( + "Failed to call ct_pr_status_get_svc_fmri for contract {ctid}: {error}" + )] + Fmri { ctid: i32, error: std::io::Error }, + #[error( + "Failed to call ct_pr_status_get_members for contract {ctid}: {error}" + )] + Members { ctid: i32, error: std::io::Error }, + #[error("ct_status_read returned successfully but handed back a null ptr for {0}")] + Null(std::path::PathBuf), + #[error("Failed to call ct_status_read on {path}: {error}")] + StatusRead { path: std::path::PathBuf, error: std::io::Error }, +} + +pub struct ContractStatus { + handle: ct_stathdl_t, +} + +impl Drop for ContractStatus { + fn drop(&mut self) { + unsafe { ct_status_free(self.handle) }; + } +} + +macro_rules! libcall_io { + ($fn: ident ( $($arg: expr), * $(,)*) ) => {{ + let res = unsafe { $fn($($arg, )*) }; + if res == 0 { + Ok(res) + } else { + Err(std::io::Error::last_os_error()) + } + }}; + } + +impl ContractStatus { + fn new(contract_status: &Path) -> Result { + let file = fs::File::open(contract_status)?; + let mut handle: ct_stathdl_t = std::ptr::null_mut(); + libcall_io!(ct_status_read(file.as_raw_fd(), CTD_ALL, &mut handle,)) + .map_err(|error| ContractError::StatusRead { + path: contract_status.to_path_buf(), + error, + })?; + + // We don't ever expect the system to hand back a null ptr when + // returning success but let's be extra cautious anyways. + if handle.is_null() { + return Err(ContractError::Null(contract_status.to_path_buf())); + } + + Ok(Self { handle }) + } + + fn get_members(&self) -> Result<&[i32], ContractError> { + let mut numpids = 0; + let mut pids: *mut pid_t = std::ptr::null_mut(); + + let pids = { + libcall_io!(ct_pr_status_get_members( + self.handle, + &mut pids, + &mut numpids, + )) + .map_err(|error| { + let ctid = unsafe { ct_status_get_id(self.handle) }; + ContractError::Members { ctid, error } + })?; + + unsafe { + if pids.is_null() { + &[] + } else { + std::slice::from_raw_parts(pids, numpids as usize) + } + } + }; + + Ok(pids) + } + + fn get_fmri(&self) -> Result, ContractError> { + // The lifetime of this string is tied to the lifetime of the status + // handle itself and will be cleaned up when the handle is freed. + let mut ptr: *mut c_char = std::ptr::null_mut(); + libcall_io!(ct_pr_status_get_svc_fmri(self.handle, &mut ptr)).map_err( + |error| { + let ctid = unsafe { ct_status_get_id(self.handle) }; + ContractError::Fmri { ctid, error } + }, + )?; + + if ptr.is_null() { + return Ok(None); + } + + let cstr = unsafe { CStr::from_ptr(ptr) }; + Ok(Some(cstr.to_owned())) + } +} + +pub fn find_oxide_pids(log: &Logger) -> Result, ContractError> { + let mut pids = BTreeSet::new(); + let ents = fs::read_dir(CT_ALL)?; + for ct in ents { + let ctid = ct?; + let mut path = ctid.path(); + path.push("status"); + + let status = match ContractStatus::new(path.as_path()) { + Ok(status) => status, + Err(e) => { + // There's a race between the time we find the contracts to the + // time we attempt to read the contract's status. We can safely + // skip all of the errors for diagnostics purposes but we should + // leave a log in our wake. + warn!(log, "Failed to read contract ({:?}): {}", path, e); + continue; + } + }; + + let fmri = status.get_fmri()?.unwrap_or_default(); + if fmri.to_string_lossy().starts_with(OXIDE_FMRI) { + pids.extend(status.get_members()?); + } + } + + Ok(pids) +} diff --git a/sled-diagnostics/src/contract_stub.rs b/sled-diagnostics/src/contract_stub.rs new file mode 100644 index 0000000000..9637c3486d --- /dev/null +++ b/sled-diagnostics/src/contract_stub.rs @@ -0,0 +1,18 @@ +//! Stub implementation for platfroms without libcontract(3lib). + +use std::collections::BTreeSet; + +use slog::{warn, Logger}; +use thiserror::Error; + +#[derive(Error, Debug)] +pub enum ContractError {} + +pub fn find_oxide_pids(log: &Logger) -> Result, ContractError> { + warn!( + log, + "Unable to find oxide pids on a non illumos platform, \ + returning empty set" + ); + Ok(BTreeSet::new()) +} diff --git a/sled-diagnostics/src/lib.rs b/sled-diagnostics/src/lib.rs new file mode 100644 index 0000000000..cbb3a5a0d0 --- /dev/null +++ b/sled-diagnostics/src/lib.rs @@ -0,0 +1,104 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Diagnostics for an Oxide sled that exposes common support commands. + +use futures::{stream::FuturesUnordered, StreamExt}; +use slog::Logger; + +cfg_if::cfg_if! { + if #[cfg(target_os = "illumos")] { + mod contract; + } else { + mod contract_stub; + use contract_stub as contract; + } +} + +mod queries; +pub use crate::queries::{ + SledDiagnosticsCmdError, SledDiagnosticsCmdOutput, + SledDiagnosticsCommandHttpOutput, +}; +use queries::*; + +/// List all zones on a sled. +pub async fn zoneadm_info( +) -> Result { + execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await +} + +/// Retrieve various `ipadm` command output for the system. +pub async fn ipadm_info( +) -> Vec> { + [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()] + .into_iter() + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} + +/// Retrieve various `dladm` command output for the system. +pub async fn dladm_info( +) -> Vec> { + [ + dladm_show_phys(), + dladm_show_ether(), + dladm_show_link(), + dladm_show_vnic(), + dladm_show_linkprop(), + ] + .into_iter() + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} + +pub async fn pargs_oxide_processes( + log: &Logger, +) -> Vec> { + // In a diagnostics context we care about looping over every pid we find, + // but on failure we should just return a single error in a vec that + // represents the entire failed operation. + let pids = match contract::find_oxide_pids(log) { + Ok(pids) => pids, + Err(e) => return vec![Err(e.into())], + }; + + pids.iter() + .map(|pid| pargs_process(*pid)) + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} + +pub async fn pstack_oxide_processes( + log: &Logger, +) -> Vec> { + // In a diagnostics context we care about looping over every pid we find, + // but on failure we should just return a single error in a vec that + // represents the entire failed operation. + let pids = match contract::find_oxide_pids(log) { + Ok(pids) => pids, + Err(e) => return vec![Err(e.into())], + }; + + pids.iter() + .map(|pid| pstack_process(*pid)) + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} diff --git a/sled-agent/src/support_bundle/queries.rs b/sled-diagnostics/src/queries.rs similarity index 69% rename from sled-agent/src/support_bundle/queries.rs rename to sled-diagnostics/src/queries.rs index 2313d9e08d..2f2b135f0d 100644 --- a/sled-agent/src/support_bundle/queries.rs +++ b/sled-diagnostics/src/queries.rs @@ -1,18 +1,37 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Wrapper for command execution with timeout. + use std::{process::Command, time::Duration}; -use futures::{stream::FuturesUnordered, StreamExt}; -use illumos_utils::{dladm::DLADM, zone::IPADM, PFEXEC, ZONEADM}; use thiserror::Error; use tokio::io::AsyncReadExt; -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); +#[cfg(target_os = "illumos")] +use crate::contract::ContractError; + +#[cfg(not(target_os = "illumos"))] +use crate::contract_stub::ContractError; + +const DLADM: &str = "/usr/sbin/dladm"; +const IPADM: &str = "/usr/sbin/ipadm"; +const PFEXEC: &str = "/usr/bin/pfexec"; +const PSTACK: &str = "/usr/bin/pstack"; +const PARGS: &str = "/usr/bin/pargs"; +const ZONEADM: &str = "/usr/sbin/zoneadm"; -pub trait SupportBundleCommandHttpOutput { +pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); + +pub trait SledDiagnosticsCommandHttpOutput { fn get_output(self) -> String; } #[derive(Error, Debug)] -pub enum SupportBundleCmdError { +pub enum SledDiagnosticsCmdError { + #[error("libcontract error: {0}")] + Contract(#[from] ContractError), #[error("Failed to duplicate pipe for command [{command}]: {error}")] Dup { command: String, error: std::io::Error }, #[error("Failed to proccess output for command [{command}]: {error}")] @@ -32,13 +51,13 @@ pub enum SupportBundleCmdError { } #[derive(Debug)] -pub struct SupportBundleCmdOutput { +pub struct SledDiagnosticsCmdOutput { pub command: String, pub stdio: String, pub exit_status: String, } -impl std::fmt::Display for SupportBundleCmdOutput { +impl std::fmt::Display for SledDiagnosticsCmdOutput { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "Command executed [{}]:", self.command)?; writeln!(f, " ==== stdio ====\n{}", self.stdio)?; @@ -46,8 +65,8 @@ impl std::fmt::Display for SupportBundleCmdOutput { } } -impl SupportBundleCommandHttpOutput - for Result +impl SledDiagnosticsCommandHttpOutput + for Result { fn get_output(self) -> String { match self { @@ -76,16 +95,19 @@ fn command_to_string(command: &Command) -> String { /// and stderr as they occur. async fn execute( cmd: Command, -) -> Result { +) -> Result { let cmd_string = command_to_string(&cmd); let (sender, mut rx) = tokio::net::unix::pipe::pipe().map_err(|e| { - SupportBundleCmdError::Pipe { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Pipe { command: cmd_string.clone(), error: e } })?; let pipe = sender.into_nonblocking_fd().map_err(|e| { - SupportBundleCmdError::OwnedFd { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::OwnedFd { + command: cmd_string.clone(), + error: e, + } })?; let pipe_dup = pipe.try_clone().map_err(|e| { - SupportBundleCmdError::Dup { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Dup { command: cmd_string.clone(), error: e } })?; // TODO MTZ: We may eventually want to reuse some of the process contract @@ -95,9 +117,8 @@ async fn execute( cmd.stdout(pipe); cmd.stderr(pipe_dup); - let mut child = cmd.spawn().map_err(|e| SupportBundleCmdError::Spawn { - command: cmd_string.clone(), - error: e, + let mut child = cmd.spawn().map_err(|e| { + SledDiagnosticsCmdError::Spawn { command: cmd_string.clone(), error: e } })?; // NB: This drop call is load-bearing and prevents a deadlock. The command // struct holds onto the write half of the pipe preventing the read side @@ -107,130 +128,103 @@ async fn execute( let mut stdio = String::new(); rx.read_to_string(&mut stdio).await.map_err(|e| { - SupportBundleCmdError::Output { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Output { + command: cmd_string.clone(), + error: e, + } })?; let exit_status = child.wait().await.map(|es| format!("{es}")).map_err(|e| { - SupportBundleCmdError::Wait { + SledDiagnosticsCmdError::Wait { command: cmd_string.clone(), error: e, } })?; - Ok(SupportBundleCmdOutput { command: cmd_string, stdio, exit_status }) + Ok(SledDiagnosticsCmdOutput { command: cmd_string, stdio, exit_status }) } /// Spawn a command that's allowed to execute within a given time limit. -async fn execute_command_with_timeout( +pub async fn execute_command_with_timeout( command: Command, duration: Duration, -) -> Result { +) -> Result { let cmd_string = command_to_string(&command); let tokio_command = execute(command); match tokio::time::timeout(duration, tokio_command).await { Ok(res) => res, - Err(_elapsed) => Err(SupportBundleCmdError::Timeout { + Err(_elapsed) => Err(SledDiagnosticsCmdError::Timeout { command: cmd_string, duration, }), } } -fn zoneadm_list() -> Command { +pub fn zoneadm_list() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(ZONEADM).arg("list").arg("-cip"); cmd } -fn ipadm_show_interface() -> Command { +pub fn ipadm_show_interface() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-if"); cmd } -fn ipadm_show_addr() -> Command { +pub fn ipadm_show_addr() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-addr"); cmd } -fn ipadm_show_prop() -> Command { +pub fn ipadm_show_prop() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-prop"); cmd } -fn dladm_show_phys() -> Command { +pub fn dladm_show_phys() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).args(["show-phys", "-m"]); cmd } -fn dladm_show_ether() -> Command { +pub fn dladm_show_ether() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-ether"); cmd } -fn dladm_show_link() -> Command { +pub fn dladm_show_link() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-link"); cmd } -fn dladm_show_vnic() -> Command { +pub fn dladm_show_vnic() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-vnic"); cmd } -fn dladm_show_linkprop() -> Command { +pub fn dladm_show_linkprop() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-linkprop"); cmd } -/* - * Public API - */ - -/// List all zones on a sled. -pub async fn zoneadm_info( -) -> Result { - execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await -} - -/// Retrieve various `ipadm` command output for the system. -pub async fn ipadm_info( -) -> Vec> { - [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()] - .into_iter() - .map(|c| async move { - execute_command_with_timeout(c, DEFAULT_TIMEOUT).await - }) - .collect::>() - .collect::>>() - .await +pub fn pargs_process(pid: i32) -> Command { + let mut cmd = std::process::Command::new(PFEXEC); + cmd.env_clear().arg(PARGS).arg("-ae").arg(pid.to_string()); + cmd } -/// Retrieve various `dladm` command output for the system. -pub async fn dladm_info( -) -> Vec> { - [ - dladm_show_phys(), - dladm_show_ether(), - dladm_show_link(), - dladm_show_vnic(), - dladm_show_linkprop(), - ] - .into_iter() - .map(|c| async move { - execute_command_with_timeout(c, DEFAULT_TIMEOUT).await - }) - .collect::>() - .collect::>>() - .await +pub fn pstack_process(pid: i32) -> Command { + let mut cmd = std::process::Command::new(PFEXEC); + cmd.env_clear().arg(PSTACK).arg(pid.to_string()); + cmd } #[cfg(test)] @@ -246,7 +240,7 @@ mod test { match execute_command_with_timeout(command, Duration::from_millis(500)) .await { - Err(SupportBundleCmdError::Timeout { .. }) => (), + Err(SledDiagnosticsCmdError::Timeout { .. }) => (), _ => panic!("command should have timed out"), } } @@ -267,7 +261,7 @@ mod test { #[tokio::test] async fn test_command_stderr_is_correct() { let mut command = Command::new("bash"); - command.env_clear().args(&["-c", "echo oxide computer > /dev/stderr"]); + command.env_clear().args(["-c", "echo oxide computer > /dev/stderr"]); let res = execute_command_with_timeout(command, Duration::from_secs(5)) .await @@ -279,7 +273,7 @@ mod test { #[tokio::test] async fn test_command_stdout_stderr_are_interleaved() { let mut command = Command::new("bash"); - command.env_clear().args(&[ + command.env_clear().args([ "-c", "echo one > /dev/stdout \ && echo two > /dev/stderr \