diff --git a/common/src/lib.rs b/common/src/lib.rs index dd4b633de9..de9eef9fd2 100644 --- a/common/src/lib.rs +++ b/common/src/lib.rs @@ -61,44 +61,6 @@ impl slog::KV for FileKv { pub const OMICRON_DPD_TAG: &str = "omicron"; -use crate::api::external::Error; -use crate::progenitor_operation_retry::ProgenitorOperationRetry; -use crate::progenitor_operation_retry::ProgenitorOperationRetryError; -use std::future::Future; - -/// Retry a progenitor client operation until a known result is returned. -/// -/// See [`ProgenitorOperationRetry`] for more information. -// TODO mark this deprecated, `never_bail` is a bad idea -pub async fn retry_until_known_result( - log: &slog::Logger, - f: F, -) -> Result> -where - F: FnMut() -> Fut, - Fut: Future>>, - E: std::fmt::Debug, -{ - match ProgenitorOperationRetry::new(f, never_bail).run(log).await { - Ok(v) => Ok(v), - - Err(e) => match e { - ProgenitorOperationRetryError::ProgenitorError(e) => Err(e), - - ProgenitorOperationRetryError::Gone - | ProgenitorOperationRetryError::GoneCheckError(_) => { - // ProgenitorOperationRetry::new called with `never_bail` as the - // bail check should never return these variants! - unreachable!(); - } - }, - } -} - -async fn never_bail() -> Result { - Ok(false) -} - /// A wrapper struct that does nothing other than elide the inner value from /// [`std::fmt::Debug`] output. /// diff --git a/nexus/db-queries/src/db/datastore/physical_disk.rs b/nexus/db-queries/src/db/datastore/physical_disk.rs index 29c210e5d6..52c93239aa 100644 --- a/nexus/db-queries/src/db/datastore/physical_disk.rs +++ b/nexus/db-queries/src/db/datastore/physical_disk.rs @@ -38,6 +38,7 @@ use omicron_common::api::external::ResourceType; use omicron_uuid_kinds::CollectionUuid; use omicron_uuid_kinds::GenericUuid; use omicron_uuid_kinds::PhysicalDiskUuid; +use omicron_uuid_kinds::SledUuid; use uuid::Uuid; impl DataStore { @@ -66,7 +67,7 @@ impl DataStore { // expunged. Self::check_sled_in_service_on_connection( &conn, - disk.sled_id, + SledUuid::from_untyped_uuid(disk.sled_id), ) .await .map_err(|txn_error| txn_error.into_diesel(&err))?; @@ -336,7 +337,6 @@ mod test { use omicron_common::api::external::ByteCount; use omicron_common::disk::{DiskIdentity, DiskVariant}; use omicron_test_utils::dev; - use omicron_uuid_kinds::SledUuid; use std::net::{Ipv6Addr, SocketAddrV6}; use std::num::NonZeroU32; diff --git a/nexus/db-queries/src/db/datastore/sled.rs b/nexus/db-queries/src/db/datastore/sled.rs index 89492390d4..7cd05d8272 100644 --- a/nexus/db-queries/src/db/datastore/sled.rs +++ b/nexus/db-queries/src/db/datastore/sled.rs @@ -39,6 +39,8 @@ use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::ResourceType; use omicron_common::bail_unless; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::SledUuid; use std::fmt; use strum::IntoEnumIterator; use thiserror::Error; @@ -99,18 +101,30 @@ impl DataStore { Ok((sled, was_modified)) } + /// Confirms that a sled exists and is in-service. + pub async fn check_sled_in_service( + &self, + opctx: &OpContext, + sled_id: SledUuid, + ) -> Result<(), Error> { + let conn = &*self.pool_connection_authorized(&opctx).await?; + Self::check_sled_in_service_on_connection(conn, sled_id) + .await + .map_err(From::from) + } + /// Confirms that a sled exists and is in-service. /// /// This function may be called from a transaction context. pub async fn check_sled_in_service_on_connection( conn: &async_bb8_diesel::Connection, - sled_id: Uuid, + sled_id: SledUuid, ) -> Result<(), TransactionError> { use db::schema::sled::dsl; let sled_exists_and_in_service = diesel::select(diesel::dsl::exists( dsl::sled .filter(dsl::time_deleted.is_null()) - .filter(dsl::id.eq(sled_id)) + .filter(dsl::id.eq(sled_id.into_untyped_uuid())) .sled_filter(SledFilter::InService), )) .get_result_async::(conn) diff --git a/nexus/src/app/sagas/snapshot_create.rs b/nexus/src/app/sagas/snapshot_create.rs index 7696296ce1..41f97237c9 100644 --- a/nexus/src/app/sagas/snapshot_create.rs +++ b/nexus/src/app/sagas/snapshot_create.rs @@ -103,14 +103,11 @@ use anyhow::anyhow; use nexus_db_model::Generation; use nexus_db_queries::db::identity::{Asset, Resource}; use nexus_db_queries::db::lookup::LookupPath; -use omicron_common::retry_until_known_result; +use omicron_common::api::external::Error; +use omicron_common::progenitor_operation_retry::ProgenitorOperationRetryError; use omicron_common::{ api::external, progenitor_operation_retry::ProgenitorOperationRetry, }; -use omicron_common::{ - api::external::Error, - progenitor_operation_retry::ProgenitorOperationRetryError, -}; use omicron_uuid_kinds::{GenericUuid, PropolisUuid, SledUuid}; use rand::{rngs::StdRng, RngCore, SeedableRng}; use serde::Deserialize; @@ -851,6 +848,7 @@ async fn ssc_send_snapshot_request_to_sled_agent( "instance no longer has an active VMM!", ))); }; + let sled_id = SledUuid::from_untyped_uuid(sled_id); info!(log, "asking for disk snapshot from Propolis via sled agent"; "disk_id" => %params.disk_id, @@ -860,11 +858,11 @@ async fn ssc_send_snapshot_request_to_sled_agent( let sled_agent_client = osagactx .nexus() - .sled_client(&SledUuid::from_untyped_uuid(sled_id)) + .sled_client(&sled_id) .await .map_err(ActionError::action_failed)?; - retry_until_known_result(log, || async { + let snapshot_operation = || async { sled_agent_client .vmm_issue_disk_snapshot_request( &PropolisUuid::from_untyped_uuid(propolis_id), @@ -872,10 +870,23 @@ async fn ssc_send_snapshot_request_to_sled_agent( &VmmIssueDiskSnapshotRequestBody { snapshot_id }, ) .await - }) - .await - .map_err(|e| e.to_string()) - .map_err(ActionError::action_failed)?; + }; + let gone_check = || async { + osagactx.datastore().check_sled_in_service(&opctx, sled_id).await?; + // `check_sled_in_service` returns an error if the sled is no longer in + // service; if it succeeds, the sled is not gone. + Ok(false) + }; + + ProgenitorOperationRetry::new(snapshot_operation, gone_check) + .run(log) + .await + .map_err(|e| { + ActionError::action_failed(format!( + "failed to issue VMM disk snapshot request: {}", + InlineErrorChain::new(&e) + )) + })?; Ok(()) }