diff --git a/Cargo.lock b/Cargo.lock index d8a1d33a6b..4de640a93d 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -7264,6 +7264,7 @@ dependencies = [ "sled-agent-api", "sled-agent-client", "sled-agent-types", + "sled-diagnostics", "sled-hardware", "sled-hardware-types", "sled-storage", @@ -10733,6 +10734,15 @@ dependencies = [ "uuid", ] +[[package]] +name = "sled-diagnostics" +version = "0.1.0" +dependencies = [ + "futures", + "thiserror", + "tokio", +] + [[package]] name = "sled-hardware" version = "0.1.0" diff --git a/Cargo.toml b/Cargo.toml index c0f4be8bae..c9fc5e300d 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -108,6 +108,7 @@ members = [ "sled-agent/bootstrap-agent-api", "sled-agent/repo-depot-api", "sled-agent/types", + "sled-diagnostics", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -240,6 +241,7 @@ default-members = [ "sled-agent/bootstrap-agent-api", "sled-agent/repo-depot-api", "sled-agent/types", + "sled-diagnostics", "sled-hardware", "sled-hardware/types", "sled-storage", @@ -596,6 +598,7 @@ sled = "=0.34.7" sled-agent-api = { path = "sled-agent/api" } sled-agent-client = { path = "clients/sled-agent-client" } sled-agent-types = { path = "sled-agent/types" } +sled-diagnostics = { path = "sled-diagnostics" } sled-hardware = { path = "sled-hardware" } sled-hardware-types = { path = "sled-hardware/types" } sled-storage = { path = "sled-storage" } diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml index 10b4ba1cdb..88c758dc31 100644 --- a/sled-agent/Cargo.toml +++ b/sled-agent/Cargo.toml @@ -81,6 +81,7 @@ sha3.workspace = true sled-agent-api.workspace = true sled-agent-client.workspace = true sled-agent-types.workspace = true +sled-diagnostics.workspace = true sled-hardware.workspace = true sled-hardware-types.workspace = true sled-storage.workspace = true diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs index 844e13151a..88259537d2 100644 --- a/sled-agent/src/http_entrypoints.rs +++ b/sled-agent/src/http_entrypoints.rs @@ -6,7 +6,6 @@ use super::sled_agent::SledAgent; use crate::sled_agent::Error as SledAgentError; -use crate::support_bundle::queries::SupportBundleCommandHttpOutput; use crate::zone_bundle::BundleError; use bootstore::schemes::v0::NetworkConfig; use camino::Utf8PathBuf; @@ -53,6 +52,7 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, StorageLimit, ZoneBundleId, ZoneBundleMetadata, }; +use sled_diagnostics::SledDiagnosticsCommandHttpOutput; use std::collections::BTreeMap; type SledApiDescription = ApiDescription; diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs index 80dbe72ea3..b9bf703933 100644 --- a/sled-agent/src/sled_agent.rs +++ b/sled-agent/src/sled_agent.rs @@ -19,10 +19,6 @@ use crate::params::OmicronZoneTypeExt; use crate::probe_manager::ProbeManager; use crate::services::{self, ServiceManager}; use crate::storage_monitor::StorageMonitorHandle; -use crate::support_bundle::queries::{ - dladm_info, ipadm_info, zoneadm_info, SupportBundleCmdError, - SupportBundleCmdOutput, -}; use crate::support_bundle::storage::SupportBundleManager; use crate::updates::{ConfigUpdates, UpdateManager}; use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager}; @@ -76,6 +72,7 @@ use sled_agent_types::zone_bundle::{ BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod, PriorityOrder, StorageLimit, ZoneBundleMetadata, }; +use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput}; use sled_hardware::{underlay, HardwareManager}; use sled_hardware_types::underlay::BootstrapInterface; use sled_hardware_types::Baseboard; @@ -1367,20 +1364,20 @@ impl SledAgent { pub(crate) async fn support_zoneadm_info( &self, - ) -> Result { - zoneadm_info().await + ) -> Result { + sled_diagnostics::zoneadm_info().await } pub(crate) async fn support_ipadm_info( &self, - ) -> Vec> { - ipadm_info().await + ) -> Vec> { + sled_diagnostics::ipadm_info().await } pub(crate) async fn support_dladm_info( &self, - ) -> Vec> { - dladm_info().await + ) -> Vec> { + sled_diagnostics::dladm_info().await } } diff --git a/sled-agent/src/support_bundle/mod.rs b/sled-agent/src/support_bundle/mod.rs index 314edfaec8..a1c4942751 100644 --- a/sled-agent/src/support_bundle/mod.rs +++ b/sled-agent/src/support_bundle/mod.rs @@ -2,5 +2,4 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -pub mod queries; pub mod storage; diff --git a/sled-diagnostics/.gitignore b/sled-diagnostics/.gitignore new file mode 100644 index 0000000000..ea8c4bf7f3 --- /dev/null +++ b/sled-diagnostics/.gitignore @@ -0,0 +1 @@ +/target diff --git a/sled-diagnostics/Cargo.toml b/sled-diagnostics/Cargo.toml new file mode 100644 index 0000000000..ac2e6215b2 --- /dev/null +++ b/sled-diagnostics/Cargo.toml @@ -0,0 +1,9 @@ +[package] +name = "sled-diagnostics" +version = "0.1.0" +edition = "2021" + +[dependencies] +futures.workspace = true +thiserror.workspace = true +tokio = { workspace = true, features = ["full"] } diff --git a/sled-diagnostics/src/lib.rs b/sled-diagnostics/src/lib.rs new file mode 100644 index 0000000000..466c1ec446 --- /dev/null +++ b/sled-diagnostics/src/lib.rs @@ -0,0 +1,52 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Diagnostics for an Oxide sled that exposes common support commands. + +use futures::{stream::FuturesUnordered, StreamExt}; + +mod queries; +pub use crate::queries::{ + SledDiagnosticsCmdError, SledDiagnosticsCmdOutput, + SledDiagnosticsCommandHttpOutput, +}; +use queries::*; + +/// List all zones on a sled. +pub async fn zoneadm_info( +) -> Result { + execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await +} + +/// Retrieve various `ipadm` command output for the system. +pub async fn ipadm_info( +) -> Vec> { + [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()] + .into_iter() + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} + +/// Retrieve various `dladm` command output for the system. +pub async fn dladm_info( +) -> Vec> { + [ + dladm_show_phys(), + dladm_show_ether(), + dladm_show_link(), + dladm_show_vnic(), + dladm_show_linkprop(), + ] + .into_iter() + .map(|c| async move { + execute_command_with_timeout(c, DEFAULT_TIMEOUT).await + }) + .collect::>() + .collect::>>() + .await +} diff --git a/sled-agent/src/support_bundle/queries.rs b/sled-diagnostics/src/queries.rs similarity index 69% rename from sled-agent/src/support_bundle/queries.rs rename to sled-diagnostics/src/queries.rs index 2313d9e08d..9a66842cb2 100644 --- a/sled-agent/src/support_bundle/queries.rs +++ b/sled-diagnostics/src/queries.rs @@ -1,18 +1,27 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +//! Wrapper for command execution with timeout. + use std::{process::Command, time::Duration}; -use futures::{stream::FuturesUnordered, StreamExt}; -use illumos_utils::{dladm::DLADM, zone::IPADM, PFEXEC, ZONEADM}; use thiserror::Error; use tokio::io::AsyncReadExt; -const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); +const DLADM: &str = "/usr/sbin/dladm"; +const IPADM: &str = "/usr/sbin/ipadm"; +const PFEXEC: &str = "/usr/bin/pfexec"; +const ZONEADM: &str = "/usr/sbin/zoneadm"; + +pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10); -pub trait SupportBundleCommandHttpOutput { +pub trait SledDiagnosticsCommandHttpOutput { fn get_output(self) -> String; } #[derive(Error, Debug)] -pub enum SupportBundleCmdError { +pub enum SledDiagnosticsCmdError { #[error("Failed to duplicate pipe for command [{command}]: {error}")] Dup { command: String, error: std::io::Error }, #[error("Failed to proccess output for command [{command}]: {error}")] @@ -32,13 +41,13 @@ pub enum SupportBundleCmdError { } #[derive(Debug)] -pub struct SupportBundleCmdOutput { +pub struct SledDiagnosticsCmdOutput { pub command: String, pub stdio: String, pub exit_status: String, } -impl std::fmt::Display for SupportBundleCmdOutput { +impl std::fmt::Display for SledDiagnosticsCmdOutput { fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result { writeln!(f, "Command executed [{}]:", self.command)?; writeln!(f, " ==== stdio ====\n{}", self.stdio)?; @@ -46,8 +55,8 @@ impl std::fmt::Display for SupportBundleCmdOutput { } } -impl SupportBundleCommandHttpOutput - for Result +impl SledDiagnosticsCommandHttpOutput + for Result { fn get_output(self) -> String { match self { @@ -76,16 +85,19 @@ fn command_to_string(command: &Command) -> String { /// and stderr as they occur. async fn execute( cmd: Command, -) -> Result { +) -> Result { let cmd_string = command_to_string(&cmd); let (sender, mut rx) = tokio::net::unix::pipe::pipe().map_err(|e| { - SupportBundleCmdError::Pipe { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Pipe { command: cmd_string.clone(), error: e } })?; let pipe = sender.into_nonblocking_fd().map_err(|e| { - SupportBundleCmdError::OwnedFd { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::OwnedFd { + command: cmd_string.clone(), + error: e, + } })?; let pipe_dup = pipe.try_clone().map_err(|e| { - SupportBundleCmdError::Dup { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Dup { command: cmd_string.clone(), error: e } })?; // TODO MTZ: We may eventually want to reuse some of the process contract @@ -95,9 +107,8 @@ async fn execute( cmd.stdout(pipe); cmd.stderr(pipe_dup); - let mut child = cmd.spawn().map_err(|e| SupportBundleCmdError::Spawn { - command: cmd_string.clone(), - error: e, + let mut child = cmd.spawn().map_err(|e| { + SledDiagnosticsCmdError::Spawn { command: cmd_string.clone(), error: e } })?; // NB: This drop call is load-bearing and prevents a deadlock. The command // struct holds onto the write half of the pipe preventing the read side @@ -107,85 +118,88 @@ async fn execute( let mut stdio = String::new(); rx.read_to_string(&mut stdio).await.map_err(|e| { - SupportBundleCmdError::Output { command: cmd_string.clone(), error: e } + SledDiagnosticsCmdError::Output { + command: cmd_string.clone(), + error: e, + } })?; let exit_status = child.wait().await.map(|es| format!("{es}")).map_err(|e| { - SupportBundleCmdError::Wait { + SledDiagnosticsCmdError::Wait { command: cmd_string.clone(), error: e, } })?; - Ok(SupportBundleCmdOutput { command: cmd_string, stdio, exit_status }) + Ok(SledDiagnosticsCmdOutput { command: cmd_string, stdio, exit_status }) } /// Spawn a command that's allowed to execute within a given time limit. -async fn execute_command_with_timeout( +pub async fn execute_command_with_timeout( command: Command, duration: Duration, -) -> Result { +) -> Result { let cmd_string = command_to_string(&command); let tokio_command = execute(command); match tokio::time::timeout(duration, tokio_command).await { Ok(res) => res, - Err(_elapsed) => Err(SupportBundleCmdError::Timeout { + Err(_elapsed) => Err(SledDiagnosticsCmdError::Timeout { command: cmd_string, duration, }), } } -fn zoneadm_list() -> Command { +pub fn zoneadm_list() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(ZONEADM).arg("list").arg("-cip"); cmd } -fn ipadm_show_interface() -> Command { +pub fn ipadm_show_interface() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-if"); cmd } -fn ipadm_show_addr() -> Command { +pub fn ipadm_show_addr() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-addr"); cmd } -fn ipadm_show_prop() -> Command { +pub fn ipadm_show_prop() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(IPADM).arg("show-prop"); cmd } -fn dladm_show_phys() -> Command { +pub fn dladm_show_phys() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).args(["show-phys", "-m"]); cmd } -fn dladm_show_ether() -> Command { +pub fn dladm_show_ether() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-ether"); cmd } -fn dladm_show_link() -> Command { +pub fn dladm_show_link() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-link"); cmd } -fn dladm_show_vnic() -> Command { +pub fn dladm_show_vnic() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-vnic"); cmd } -fn dladm_show_linkprop() -> Command { +pub fn dladm_show_linkprop() -> Command { let mut cmd = std::process::Command::new(PFEXEC); cmd.env_clear().arg(DLADM).arg("show-linkprop"); cmd @@ -195,44 +209,6 @@ fn dladm_show_linkprop() -> Command { * Public API */ -/// List all zones on a sled. -pub async fn zoneadm_info( -) -> Result { - execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await -} - -/// Retrieve various `ipadm` command output for the system. -pub async fn ipadm_info( -) -> Vec> { - [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()] - .into_iter() - .map(|c| async move { - execute_command_with_timeout(c, DEFAULT_TIMEOUT).await - }) - .collect::>() - .collect::>>() - .await -} - -/// Retrieve various `dladm` command output for the system. -pub async fn dladm_info( -) -> Vec> { - [ - dladm_show_phys(), - dladm_show_ether(), - dladm_show_link(), - dladm_show_vnic(), - dladm_show_linkprop(), - ] - .into_iter() - .map(|c| async move { - execute_command_with_timeout(c, DEFAULT_TIMEOUT).await - }) - .collect::>() - .collect::>>() - .await -} - #[cfg(test)] mod test { use super::*; @@ -246,7 +222,7 @@ mod test { match execute_command_with_timeout(command, Duration::from_millis(500)) .await { - Err(SupportBundleCmdError::Timeout { .. }) => (), + Err(SledDiagnosticsCmdError::Timeout { .. }) => (), _ => panic!("command should have timed out"), } } @@ -267,7 +243,7 @@ mod test { #[tokio::test] async fn test_command_stderr_is_correct() { let mut command = Command::new("bash"); - command.env_clear().args(&["-c", "echo oxide computer > /dev/stderr"]); + command.env_clear().args(["-c", "echo oxide computer > /dev/stderr"]); let res = execute_command_with_timeout(command, Duration::from_secs(5)) .await @@ -279,7 +255,7 @@ mod test { #[tokio::test] async fn test_command_stdout_stderr_are_interleaved() { let mut command = Command::new("bash"); - command.env_clear().args(&[ + command.env_clear().args([ "-c", "echo one > /dev/stdout \ && echo two > /dev/stderr \