From 06f804575ce440495a6998d51f3d01863b1f5edd Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Fri, 13 Dec 2024 12:52:15 -0800
Subject: [PATCH 01/10] Create dtrace probes for transactions (#7248)

- Adds a "NonRetryHelper" struct to help instrument non-retryable
transactions
- Adds `transaction__start` and `transaction__done` probes which wrap
our retryable (and now, non-retryable transactions)

Intended to supplement
https://github.com/oxidecomputer/omicron/pull/7244 , and provide
transaction names
---
 Cargo.lock                                    |  4 +-
 Cargo.toml                                    |  2 +-
 clippy.toml                                   |  2 +-
 .../db-queries/src/db/datastore/deployment.rs |  7 +-
 nexus/db-queries/src/db/datastore/dns.rs      | 31 +++----
 .../db-queries/src/db/datastore/inventory.rs  |  6 +-
 nexus/db-queries/src/db/datastore/mod.rs      |  8 ++
 nexus/db-queries/src/db/datastore/rack.rs     | 11 +--
 nexus/db-queries/src/db/datastore/role.rs     |  8 +-
 nexus/db-queries/src/db/datastore/silo.rs     | 80 +++++++++----------
 .../db-queries/src/db/datastore/silo_group.rs |  8 +-
 nexus/db-queries/src/lib.rs                   |  6 ++
 nexus/db-queries/src/transaction_retry.rs     | 48 +++++++++++
 13 files changed, 140 insertions(+), 81 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 65ac0fd91c..04cea43c75 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2321,9 +2321,9 @@ dependencies = [
 
 [[package]]
 name = "diesel-dtrace"
-version = "0.4.0"
+version = "0.4.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "4e5130181059723aae1cfdb678d3698052a225aaadb18000f77fec4200047acc"
+checksum = "2750c8bd7a42381620b57f370ca5f757a71d554814e02a43c1bf54ae2656e01d"
 dependencies = [
  "diesel",
  "serde",
diff --git a/Cargo.toml b/Cargo.toml
index ec862fb722..39d8f33ca6 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -361,7 +361,7 @@ derive_more = "0.99.18"
 derive-where = "1.2.7"
 # Having the i-implement-... feature here makes diesel go away from the workspace-hack
 diesel = { version = "2.2.4", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
-diesel-dtrace = "0.4.0"
+diesel-dtrace = "0.4.2"
 dns-server = { path = "dns-server" }
 dns-server-api = { path = "dns-server-api" }
 dns-service-client = { path = "clients/dns-service-client" }
diff --git a/clippy.toml b/clippy.toml
index 31e28d5911..677fb67ade 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -15,5 +15,5 @@ disallowed-methods = [
     # and can fail spuriously.
     # Instead, the "transaction_retry_wrapper" should be preferred, as it
     # automatically retries transactions experiencing contention.
-    { path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. Feel free to override this for tests and nested transactions." },
+    { path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. For tests and nested transactions, use transaction_non_retry_wrapper to at least get dtrace probes" },
 ]
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
index 0c73ae1ae2..4210f2bee2 100644
--- a/nexus/db-queries/src/db/datastore/deployment.rs
+++ b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -16,7 +16,6 @@ use crate::db::DbConnection;
 use crate::db::TransactionError;
 use crate::transaction_retry::OptionalError;
 use anyhow::Context;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::DateTime;
 use chrono::Utc;
@@ -106,7 +105,7 @@ impl DataStore {
         blueprint: &Blueprint,
     ) -> Result<(), Error> {
         let conn = self.pool_connection_authorized(opctx).await?;
-        Self::blueprint_insert_on_connection(&conn, opctx, blueprint).await
+        self.blueprint_insert_on_connection(&conn, opctx, blueprint).await
     }
 
     /// Creates a transaction iff the current blueprint is "bp_id".
@@ -182,6 +181,7 @@ impl DataStore {
     /// Variant of [Self::blueprint_insert] which may be called from a
     /// transaction context.
     pub(crate) async fn blueprint_insert_on_connection(
+        &self,
         conn: &async_bb8_diesel::Connection<DbConnection>,
         opctx: &OpContext,
         blueprint: &Blueprint,
@@ -340,7 +340,8 @@ impl DataStore {
         // as most of the operations should be insertions rather than in-place
         // modifications of existing tables.
         #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("blueprint_insert")
+            .transaction(&conn, |conn| async move {
             // Insert the row for the blueprint.
             {
                 use db::schema::blueprint::dsl;
diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs
index 3f0f7828fa..114d553aac 100644
--- a/nexus/db-queries/src/db/datastore/dns.rs
+++ b/nexus/db-queries/src/db/datastore/dns.rs
@@ -20,7 +20,6 @@ use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
 use crate::db::TransactionError;
 use crate::transaction_retry::OptionalError;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use futures::future::BoxFuture;
@@ -453,20 +452,24 @@ impl DataStore {
 
         // This method is used in nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|c| async move {
-            let version = self
-                .dns_group_latest_version_conn(opctx, conn, update.dns_group)
-                .await?;
-            self.dns_write_version_internal(
-                &c,
-                update,
-                zones,
-                Generation(version.version.next()),
-            )
+        self.transaction_non_retry_wrapper("dns_update_incremental")
+            .transaction(&conn, |c| async move {
+                let version = self
+                    .dns_group_latest_version_conn(
+                        opctx,
+                        conn,
+                        update.dns_group,
+                    )
+                    .await?;
+                self.dns_write_version_internal(
+                    &c,
+                    update,
+                    zones,
+                    Generation(version.version.next()),
+                )
+                .await
+            })
             .await
-        })
-        .await
     }
 
     // This must only be used inside a transaction.  Otherwise, it may make
diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs
index 9269b233f3..08c64a3254 100644
--- a/nexus/db-queries/src/db/datastore/inventory.rs
+++ b/nexus/db-queries/src/db/datastore/inventory.rs
@@ -278,13 +278,13 @@ impl DataStore {
         // batch rather than making a bunch of round-trips to the database.
         // We'd do that if we had an interface for doing that with bound
         // parameters, etc.  See oxidecomputer/omicron#973.
-        let pool = self.pool_connection_authorized(opctx).await?;
+        let conn = self.pool_connection_authorized(opctx).await?;
 
         // The risk of a serialization error is possible here, but low,
         // as most of the operations should be insertions rather than in-place
         // modifications of existing tables.
-        #[allow(clippy::disallowed_methods)]
-        pool.transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("inventory_insert_collection")
+            .transaction(&conn, |conn| async move {
             // Insert records (and generate ids) for any baseboards that do not
             // already exist in the database.  These rows are not scoped to a
             // particular collection.  They contain only immutable data --
diff --git a/nexus/db-queries/src/db/datastore/mod.rs b/nexus/db-queries/src/db/datastore/mod.rs
index 25f1f282f7..c90f1cc92e 100644
--- a/nexus/db-queries/src/db/datastore/mod.rs
+++ b/nexus/db-queries/src/db/datastore/mod.rs
@@ -320,6 +320,14 @@ impl DataStore {
         )
     }
 
+    /// Constructs a non-retryable transaction helper
+    pub fn transaction_non_retry_wrapper(
+        &self,
+        name: &'static str,
+    ) -> crate::transaction_retry::NonRetryHelper {
+        crate::transaction_retry::NonRetryHelper::new(&self.log, name)
+    }
+
     #[cfg(test)]
     pub(crate) fn transaction_retry_producer(
         &self,
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index dc3175c22d..067b8ed3ce 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -26,7 +26,6 @@ use crate::db::model::Zpool;
 use crate::db::pagination::paginated;
 use crate::db::pool::DbConnection;
 use crate::db::TransactionError;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -676,11 +675,9 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        let rack = self
-            .pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        let rack = self.transaction_non_retry_wrapper("rack_set_initialized")
+            .transaction(&conn, |conn| {
                 let err = err.clone();
                 let log = log.clone();
                 let authz_service_pool = authz_service_pool.clone();
@@ -752,7 +749,7 @@ impl DataStore {
                     }
 
                     // Insert the RSS-generated blueprint.
-                    Self::blueprint_insert_on_connection(
+                    self.blueprint_insert_on_connection(
                         &conn, opctx, &blueprint,
                     )
                     .await
diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs
index ed8ec6fcd9..757658351b 100644
--- a/nexus/db-queries/src/db/datastore/role.rs
+++ b/nexus/db-queries/src/db/datastore/role.rs
@@ -20,7 +20,6 @@ use crate::db::model::RoleAssignment;
 use crate::db::model::RoleBuiltin;
 use crate::db::pagination::paginated_multicolumn;
 use crate::db::pool::DbConnection;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use nexus_db_fixed_data::role_assignment::BUILTIN_ROLE_ASSIGNMENTS;
@@ -213,10 +212,9 @@ impl DataStore {
         // This method should probably be retryable, but this is slightly
         // complicated by the cloning semantics of the queries, which
         // must be Clone to be retried.
-        #[allow(clippy::disallowed_methods)]
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        let conn = self.pool_connection_authorized(opctx).await?;
+        self.transaction_non_retry_wrapper("role_assignment_replace_visible")
+            .transaction(&conn, |conn| async move {
                 delete_old_query.execute_async(&conn).await?;
                 Ok(insert_new_query.get_results_async(&conn).await?)
             })
diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs
index b862f3c461..c963e13ae4 100644
--- a/nexus/db-queries/src/db/datastore/silo.rs
+++ b/nexus/db-queries/src/db/datastore/silo.rs
@@ -24,7 +24,6 @@ use crate::db::model::VirtualProvisioningCollection;
 use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -227,9 +226,9 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        let silo = conn
-            .transaction_async(|conn| async move {
+        let silo = self
+            .transaction_non_retry_wrapper("silo_create")
+            .transaction(&conn, |conn| async move {
                 let silo = silo_create_query
                     .get_result_async(&conn)
                     .await
@@ -429,48 +428,49 @@ impl DataStore {
 
         // This method uses nested transactions, which are not supported
         // with retryable transactions.
-        #[allow(clippy::disallowed_methods)]
-        conn.transaction_async(|conn| async move {
-            let updated_rows = diesel::update(silo::dsl::silo)
-                .filter(silo::dsl::time_deleted.is_null())
-                .filter(silo::dsl::id.eq(id))
-                .filter(silo::dsl::rcgen.eq(rcgen))
-                .set(silo::dsl::time_deleted.eq(now))
-                .execute_async(&conn)
-                .await
-                .map_err(|e| {
-                    public_error_from_diesel(
-                        e,
-                        ErrorHandler::NotFoundByResource(authz_silo),
-                    )
-                })?;
+        self.transaction_non_retry_wrapper("silo_delete")
+            .transaction(&conn, |conn| async move {
+                let updated_rows = diesel::update(silo::dsl::silo)
+                    .filter(silo::dsl::time_deleted.is_null())
+                    .filter(silo::dsl::id.eq(id))
+                    .filter(silo::dsl::rcgen.eq(rcgen))
+                    .set(silo::dsl::time_deleted.eq(now))
+                    .execute_async(&conn)
+                    .await
+                    .map_err(|e| {
+                        public_error_from_diesel(
+                            e,
+                            ErrorHandler::NotFoundByResource(authz_silo),
+                        )
+                    })?;
 
-            if updated_rows == 0 {
-                return Err(TxnError::CustomError(Error::invalid_request(
-                    "silo deletion failed due to concurrent modification",
-                )));
-            }
+                if updated_rows == 0 {
+                    return Err(TxnError::CustomError(Error::invalid_request(
+                        "silo deletion failed due to concurrent modification",
+                    )));
+                }
 
-            self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
+                self.silo_quotas_delete(opctx, &conn, &authz_silo).await?;
 
-            self.virtual_provisioning_collection_delete_on_connection(
-                &opctx.log, &conn, id,
-            )
-            .await?;
+                self.virtual_provisioning_collection_delete_on_connection(
+                    &opctx.log, &conn, id,
+                )
+                .await?;
 
-            self.dns_update_incremental(dns_opctx, &conn, dns_update).await?;
+                self.dns_update_incremental(dns_opctx, &conn, dns_update)
+                    .await?;
 
-            info!(opctx.log, "deleted silo {}", id);
+                info!(opctx.log, "deleted silo {}", id);
 
-            Ok(())
-        })
-        .await
-        .map_err(|e| match e {
-            TxnError::CustomError(e) => e,
-            TxnError::Database(e) => {
-                public_error_from_diesel(e, ErrorHandler::Server)
-            }
-        })?;
+                Ok(())
+            })
+            .await
+            .map_err(|e| match e {
+                TxnError::CustomError(e) => e,
+                TxnError::Database(e) => {
+                    public_error_from_diesel(e, ErrorHandler::Server)
+                }
+            })?;
 
         // TODO-correctness This needs to happen in a saga or some other
         // mechanism that ensures it happens even if we crash at this point.
diff --git a/nexus/db-queries/src/db/datastore/silo_group.rs b/nexus/db-queries/src/db/datastore/silo_group.rs
index e6168f4e42..4d5543cc9c 100644
--- a/nexus/db-queries/src/db/datastore/silo_group.rs
+++ b/nexus/db-queries/src/db/datastore/silo_group.rs
@@ -16,7 +16,6 @@ use crate::db::model::SiloGroup;
 use crate::db::model::SiloGroupMembership;
 use crate::db::pagination::paginated;
 use crate::db::IncompleteOnConflictExt;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -198,12 +197,11 @@ impl DataStore {
         type TxnError = TransactionError<SiloDeleteError>;
 
         let group_id = authz_silo_group.id();
+        let conn = self.pool_connection_authorized(opctx).await?;
 
         // Prefer to use "transaction_retry_wrapper"
-        #[allow(clippy::disallowed_methods)]
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        self.transaction_non_retry_wrapper("silo_group_delete")
+            .transaction(&conn, |conn| async move {
                 use db::schema::silo_group_membership;
 
                 // Don't delete groups that still have memberships
diff --git a/nexus/db-queries/src/lib.rs b/nexus/db-queries/src/lib.rs
index 003310f920..c02c781c8e 100644
--- a/nexus/db-queries/src/lib.rs
+++ b/nexus/db-queries/src/lib.rs
@@ -38,4 +38,10 @@ mod probes {
 
     // Fires when we fail to find a VNI in the provided range.
     fn vni__search__range__empty(_: &usdt::UniqueId) {}
+
+    // Fires when a transaction has started
+    fn transaction__start(conn_id: uuid::Uuid, name: &str) {}
+
+    // Fires when a transaction has completed
+    fn transaction__done(conn_id: uuid::Uuid, name: &str) {}
 }
diff --git a/nexus/db-queries/src/transaction_retry.rs b/nexus/db-queries/src/transaction_retry.rs
index 02d00f8215..cf8ee22376 100644
--- a/nexus/db-queries/src/transaction_retry.rs
+++ b/nexus/db-queries/src/transaction_retry.rs
@@ -106,9 +106,15 @@ impl RetryHelper {
             + Send
             + Sync,
     {
+        crate::probes::transaction__start!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
         let result = conn
             .transaction_async_with_retry(f, || self.retry_callback())
             .await;
+        crate::probes::transaction__done!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
 
         let retry_info = self.inner.lock().unwrap();
         if retry_info.has_retried() {
@@ -179,6 +185,48 @@ impl oximeter::Producer for Producer {
     }
 }
 
+/// Helper utility to have a similar interface to RetryHelper (also emitting
+/// probes) but for non-retryable transactions.
+pub struct NonRetryHelper {
+    log: Logger,
+    name: &'static str,
+}
+
+impl NonRetryHelper {
+    pub(crate) fn new(log: &Logger, name: &'static str) -> Self {
+        Self { log: log.new(o!("transaction" => name)), name }
+    }
+
+    /// Calls the function "f" in an asynchronous, non-retryable transaction.
+    pub async fn transaction<R, E, Func, Fut>(
+        self,
+        conn: &async_bb8_diesel::Connection<crate::db::DbConnection>,
+        f: Func,
+    ) -> Result<R, E>
+    where
+        R: Send + 'static,
+        E: From<DieselError> + std::fmt::Debug + Send + 'static,
+        Fut: std::future::Future<Output = Result<R, E>> + Send,
+        Func: FnOnce(async_bb8_diesel::Connection<crate::db::DbConnection>) -> Fut
+            + Send
+            + Sync,
+    {
+        crate::probes::transaction__start!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
+
+        #[allow(clippy::disallowed_methods)]
+        let result = conn.transaction_async(f).await;
+        crate::probes::transaction__done!(|| {
+            (conn.as_sync_conn().id(), self.name)
+        });
+        if let Err(err) = result.as_ref() {
+            warn!(self.log, "Non-retryable transaction failure"; "err" => ?err);
+        }
+        result
+    }
+}
+
 /// Helper utility for passing non-retryable errors out-of-band from
 /// transactions.
 ///

From 5ef0b7d93abedec94ddbc4cc60ea562017e7f262 Mon Sep 17 00:00:00 2001
From: Michael Zeller <mike@mikezeller.net>
Date: Fri, 13 Dec 2024 17:45:44 -0500
Subject: [PATCH 02/10] Pull support-bundle/queries into a standalone crate
 (#7193)

This pulls out sled-agent/support-bundle/queries from Omicron into a
standalone sled-diagnostics crate that can be reused outside of
sled-agent.
---
 Cargo.lock                                    |  11 ++
 Cargo.toml                                    |   3 +
 sled-agent/Cargo.toml                         |   1 +
 sled-agent/src/http_entrypoints.rs            |   2 +-
 sled-agent/src/sled_agent.rs                  |  17 +--
 sled-agent/src/support_bundle/mod.rs          |   1 -
 sled-diagnostics/.gitignore                   |   1 +
 sled-diagnostics/Cargo.toml                   |  13 ++
 sled-diagnostics/src/lib.rs                   |  52 ++++++++
 .../src}/queries.rs                           | 120 +++++++-----------
 10 files changed, 137 insertions(+), 84 deletions(-)
 create mode 100644 sled-diagnostics/.gitignore
 create mode 100644 sled-diagnostics/Cargo.toml
 create mode 100644 sled-diagnostics/src/lib.rs
 rename {sled-agent/src/support_bundle => sled-diagnostics/src}/queries.rs (69%)

diff --git a/Cargo.lock b/Cargo.lock
index 04cea43c75..f53a518f76 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7265,6 +7265,7 @@ dependencies = [
  "sled-agent-api",
  "sled-agent-client",
  "sled-agent-types",
+ "sled-diagnostics",
  "sled-hardware",
  "sled-hardware-types",
  "sled-storage",
@@ -10734,6 +10735,16 @@ dependencies = [
  "uuid",
 ]
 
+[[package]]
+name = "sled-diagnostics"
+version = "0.1.0"
+dependencies = [
+ "futures",
+ "omicron-workspace-hack",
+ "thiserror 1.0.69",
+ "tokio",
+]
+
 [[package]]
 name = "sled-hardware"
 version = "0.1.0"
diff --git a/Cargo.toml b/Cargo.toml
index 39d8f33ca6..bb9ff0e80f 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -108,6 +108,7 @@ members = [
     "sled-agent/bootstrap-agent-api",
     "sled-agent/repo-depot-api",
     "sled-agent/types",
+    "sled-diagnostics",
     "sled-hardware",
     "sled-hardware/types",
     "sled-storage",
@@ -240,6 +241,7 @@ default-members = [
     "sled-agent/bootstrap-agent-api",
     "sled-agent/repo-depot-api",
     "sled-agent/types",
+    "sled-diagnostics",
     "sled-hardware",
     "sled-hardware/types",
     "sled-storage",
@@ -596,6 +598,7 @@ sled = "=0.34.7"
 sled-agent-api = { path = "sled-agent/api" }
 sled-agent-client = { path = "clients/sled-agent-client" }
 sled-agent-types = { path = "sled-agent/types" }
+sled-diagnostics = { path = "sled-diagnostics" }
 sled-hardware = { path = "sled-hardware" }
 sled-hardware-types = { path = "sled-hardware/types" }
 sled-storage = { path = "sled-storage" }
diff --git a/sled-agent/Cargo.toml b/sled-agent/Cargo.toml
index 10b4ba1cdb..88c758dc31 100644
--- a/sled-agent/Cargo.toml
+++ b/sled-agent/Cargo.toml
@@ -81,6 +81,7 @@ sha3.workspace = true
 sled-agent-api.workspace = true
 sled-agent-client.workspace = true
 sled-agent-types.workspace = true
+sled-diagnostics.workspace = true
 sled-hardware.workspace = true
 sled-hardware-types.workspace = true
 sled-storage.workspace = true
diff --git a/sled-agent/src/http_entrypoints.rs b/sled-agent/src/http_entrypoints.rs
index 844e13151a..88259537d2 100644
--- a/sled-agent/src/http_entrypoints.rs
+++ b/sled-agent/src/http_entrypoints.rs
@@ -6,7 +6,6 @@
 
 use super::sled_agent::SledAgent;
 use crate::sled_agent::Error as SledAgentError;
-use crate::support_bundle::queries::SupportBundleCommandHttpOutput;
 use crate::zone_bundle::BundleError;
 use bootstore::schemes::v0::NetworkConfig;
 use camino::Utf8PathBuf;
@@ -53,6 +52,7 @@ use sled_agent_types::zone_bundle::{
     BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
     StorageLimit, ZoneBundleId, ZoneBundleMetadata,
 };
+use sled_diagnostics::SledDiagnosticsCommandHttpOutput;
 use std::collections::BTreeMap;
 
 type SledApiDescription = ApiDescription<SledAgent>;
diff --git a/sled-agent/src/sled_agent.rs b/sled-agent/src/sled_agent.rs
index 80dbe72ea3..b9bf703933 100644
--- a/sled-agent/src/sled_agent.rs
+++ b/sled-agent/src/sled_agent.rs
@@ -19,10 +19,6 @@ use crate::params::OmicronZoneTypeExt;
 use crate::probe_manager::ProbeManager;
 use crate::services::{self, ServiceManager};
 use crate::storage_monitor::StorageMonitorHandle;
-use crate::support_bundle::queries::{
-    dladm_info, ipadm_info, zoneadm_info, SupportBundleCmdError,
-    SupportBundleCmdOutput,
-};
 use crate::support_bundle::storage::SupportBundleManager;
 use crate::updates::{ConfigUpdates, UpdateManager};
 use crate::vmm_reservoir::{ReservoirMode, VmmReservoirManager};
@@ -76,6 +72,7 @@ use sled_agent_types::zone_bundle::{
     BundleUtilization, CleanupContext, CleanupCount, CleanupPeriod,
     PriorityOrder, StorageLimit, ZoneBundleMetadata,
 };
+use sled_diagnostics::{SledDiagnosticsCmdError, SledDiagnosticsCmdOutput};
 use sled_hardware::{underlay, HardwareManager};
 use sled_hardware_types::underlay::BootstrapInterface;
 use sled_hardware_types::Baseboard;
@@ -1367,20 +1364,20 @@ impl SledAgent {
 
     pub(crate) async fn support_zoneadm_info(
         &self,
-    ) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
-        zoneadm_info().await
+    ) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
+        sled_diagnostics::zoneadm_info().await
     }
 
     pub(crate) async fn support_ipadm_info(
         &self,
-    ) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-        ipadm_info().await
+    ) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+        sled_diagnostics::ipadm_info().await
     }
 
     pub(crate) async fn support_dladm_info(
         &self,
-    ) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-        dladm_info().await
+    ) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+        sled_diagnostics::dladm_info().await
     }
 }
 
diff --git a/sled-agent/src/support_bundle/mod.rs b/sled-agent/src/support_bundle/mod.rs
index 314edfaec8..a1c4942751 100644
--- a/sled-agent/src/support_bundle/mod.rs
+++ b/sled-agent/src/support_bundle/mod.rs
@@ -2,5 +2,4 @@
 // License, v. 2.0. If a copy of the MPL was not distributed with this
 // file, You can obtain one at https://mozilla.org/MPL/2.0/.
 
-pub mod queries;
 pub mod storage;
diff --git a/sled-diagnostics/.gitignore b/sled-diagnostics/.gitignore
new file mode 100644
index 0000000000..ea8c4bf7f3
--- /dev/null
+++ b/sled-diagnostics/.gitignore
@@ -0,0 +1 @@
+/target
diff --git a/sled-diagnostics/Cargo.toml b/sled-diagnostics/Cargo.toml
new file mode 100644
index 0000000000..98ac59b674
--- /dev/null
+++ b/sled-diagnostics/Cargo.toml
@@ -0,0 +1,13 @@
+[package]
+name = "sled-diagnostics"
+version = "0.1.0"
+edition = "2021"
+
+[lints]
+workspace = true
+
+[dependencies]
+futures.workspace = true
+omicron-workspace-hack.workspace = true
+thiserror.workspace = true
+tokio = { workspace = true, features = ["full"] }
diff --git a/sled-diagnostics/src/lib.rs b/sled-diagnostics/src/lib.rs
new file mode 100644
index 0000000000..466c1ec446
--- /dev/null
+++ b/sled-diagnostics/src/lib.rs
@@ -0,0 +1,52 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Diagnostics for an Oxide sled that exposes common support commands.
+
+use futures::{stream::FuturesUnordered, StreamExt};
+
+mod queries;
+pub use crate::queries::{
+    SledDiagnosticsCmdError, SledDiagnosticsCmdOutput,
+    SledDiagnosticsCommandHttpOutput,
+};
+use queries::*;
+
+/// List all zones on a sled.
+pub async fn zoneadm_info(
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
+    execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await
+}
+
+/// Retrieve various `ipadm` command output for the system.
+pub async fn ipadm_info(
+) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+    [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()]
+        .into_iter()
+        .map(|c| async move {
+            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
+        })
+        .collect::<FuturesUnordered<_>>()
+        .collect::<Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>>>()
+        .await
+}
+
+/// Retrieve various `dladm` command output for the system.
+pub async fn dladm_info(
+) -> Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>> {
+    [
+        dladm_show_phys(),
+        dladm_show_ether(),
+        dladm_show_link(),
+        dladm_show_vnic(),
+        dladm_show_linkprop(),
+    ]
+        .into_iter()
+        .map(|c| async move {
+            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
+        })
+        .collect::<FuturesUnordered<_>>()
+        .collect::<Vec<Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>>>()
+        .await
+}
diff --git a/sled-agent/src/support_bundle/queries.rs b/sled-diagnostics/src/queries.rs
similarity index 69%
rename from sled-agent/src/support_bundle/queries.rs
rename to sled-diagnostics/src/queries.rs
index 2313d9e08d..9a66842cb2 100644
--- a/sled-agent/src/support_bundle/queries.rs
+++ b/sled-diagnostics/src/queries.rs
@@ -1,18 +1,27 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Wrapper for command execution with timeout.
+
 use std::{process::Command, time::Duration};
 
-use futures::{stream::FuturesUnordered, StreamExt};
-use illumos_utils::{dladm::DLADM, zone::IPADM, PFEXEC, ZONEADM};
 use thiserror::Error;
 use tokio::io::AsyncReadExt;
 
-const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
+const DLADM: &str = "/usr/sbin/dladm";
+const IPADM: &str = "/usr/sbin/ipadm";
+const PFEXEC: &str = "/usr/bin/pfexec";
+const ZONEADM: &str = "/usr/sbin/zoneadm";
+
+pub const DEFAULT_TIMEOUT: Duration = Duration::from_secs(10);
 
-pub trait SupportBundleCommandHttpOutput {
+pub trait SledDiagnosticsCommandHttpOutput {
     fn get_output(self) -> String;
 }
 
 #[derive(Error, Debug)]
-pub enum SupportBundleCmdError {
+pub enum SledDiagnosticsCmdError {
     #[error("Failed to duplicate pipe for command [{command}]: {error}")]
     Dup { command: String, error: std::io::Error },
     #[error("Failed to proccess output for command [{command}]: {error}")]
@@ -32,13 +41,13 @@ pub enum SupportBundleCmdError {
 }
 
 #[derive(Debug)]
-pub struct SupportBundleCmdOutput {
+pub struct SledDiagnosticsCmdOutput {
     pub command: String,
     pub stdio: String,
     pub exit_status: String,
 }
 
-impl std::fmt::Display for SupportBundleCmdOutput {
+impl std::fmt::Display for SledDiagnosticsCmdOutput {
     fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
         writeln!(f, "Command executed [{}]:", self.command)?;
         writeln!(f, "    ==== stdio ====\n{}", self.stdio)?;
@@ -46,8 +55,8 @@ impl std::fmt::Display for SupportBundleCmdOutput {
     }
 }
 
-impl SupportBundleCommandHttpOutput
-    for Result<SupportBundleCmdOutput, SupportBundleCmdError>
+impl SledDiagnosticsCommandHttpOutput
+    for Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError>
 {
     fn get_output(self) -> String {
         match self {
@@ -76,16 +85,19 @@ fn command_to_string(command: &Command) -> String {
 /// and stderr as they occur.
 async fn execute(
     cmd: Command,
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
     let cmd_string = command_to_string(&cmd);
     let (sender, mut rx) = tokio::net::unix::pipe::pipe().map_err(|e| {
-        SupportBundleCmdError::Pipe { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Pipe { command: cmd_string.clone(), error: e }
     })?;
     let pipe = sender.into_nonblocking_fd().map_err(|e| {
-        SupportBundleCmdError::OwnedFd { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::OwnedFd {
+            command: cmd_string.clone(),
+            error: e,
+        }
     })?;
     let pipe_dup = pipe.try_clone().map_err(|e| {
-        SupportBundleCmdError::Dup { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Dup { command: cmd_string.clone(), error: e }
     })?;
 
     // TODO MTZ: We may eventually want to reuse some of the process contract
@@ -95,9 +107,8 @@ async fn execute(
     cmd.stdout(pipe);
     cmd.stderr(pipe_dup);
 
-    let mut child = cmd.spawn().map_err(|e| SupportBundleCmdError::Spawn {
-        command: cmd_string.clone(),
-        error: e,
+    let mut child = cmd.spawn().map_err(|e| {
+        SledDiagnosticsCmdError::Spawn { command: cmd_string.clone(), error: e }
     })?;
     // NB: This drop call is load-bearing and prevents a deadlock. The command
     // struct holds onto the write half of the pipe preventing the read side
@@ -107,85 +118,88 @@ async fn execute(
 
     let mut stdio = String::new();
     rx.read_to_string(&mut stdio).await.map_err(|e| {
-        SupportBundleCmdError::Output { command: cmd_string.clone(), error: e }
+        SledDiagnosticsCmdError::Output {
+            command: cmd_string.clone(),
+            error: e,
+        }
     })?;
 
     let exit_status =
         child.wait().await.map(|es| format!("{es}")).map_err(|e| {
-            SupportBundleCmdError::Wait {
+            SledDiagnosticsCmdError::Wait {
                 command: cmd_string.clone(),
                 error: e,
             }
         })?;
 
-    Ok(SupportBundleCmdOutput { command: cmd_string, stdio, exit_status })
+    Ok(SledDiagnosticsCmdOutput { command: cmd_string, stdio, exit_status })
 }
 
 /// Spawn a command that's allowed to execute within a given time limit.
-async fn execute_command_with_timeout(
+pub async fn execute_command_with_timeout(
     command: Command,
     duration: Duration,
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
+) -> Result<SledDiagnosticsCmdOutput, SledDiagnosticsCmdError> {
     let cmd_string = command_to_string(&command);
     let tokio_command = execute(command);
     match tokio::time::timeout(duration, tokio_command).await {
         Ok(res) => res,
-        Err(_elapsed) => Err(SupportBundleCmdError::Timeout {
+        Err(_elapsed) => Err(SledDiagnosticsCmdError::Timeout {
             command: cmd_string,
             duration,
         }),
     }
 }
 
-fn zoneadm_list() -> Command {
+pub fn zoneadm_list() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(ZONEADM).arg("list").arg("-cip");
     cmd
 }
 
-fn ipadm_show_interface() -> Command {
+pub fn ipadm_show_interface() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-if");
     cmd
 }
 
-fn ipadm_show_addr() -> Command {
+pub fn ipadm_show_addr() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-addr");
     cmd
 }
 
-fn ipadm_show_prop() -> Command {
+pub fn ipadm_show_prop() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(IPADM).arg("show-prop");
     cmd
 }
 
-fn dladm_show_phys() -> Command {
+pub fn dladm_show_phys() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).args(["show-phys", "-m"]);
     cmd
 }
 
-fn dladm_show_ether() -> Command {
+pub fn dladm_show_ether() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-ether");
     cmd
 }
 
-fn dladm_show_link() -> Command {
+pub fn dladm_show_link() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-link");
     cmd
 }
 
-fn dladm_show_vnic() -> Command {
+pub fn dladm_show_vnic() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-vnic");
     cmd
 }
 
-fn dladm_show_linkprop() -> Command {
+pub fn dladm_show_linkprop() -> Command {
     let mut cmd = std::process::Command::new(PFEXEC);
     cmd.env_clear().arg(DLADM).arg("show-linkprop");
     cmd
@@ -195,44 +209,6 @@ fn dladm_show_linkprop() -> Command {
  * Public API
  */
 
-/// List all zones on a sled.
-pub async fn zoneadm_info(
-) -> Result<SupportBundleCmdOutput, SupportBundleCmdError> {
-    execute_command_with_timeout(zoneadm_list(), DEFAULT_TIMEOUT).await
-}
-
-/// Retrieve various `ipadm` command output for the system.
-pub async fn ipadm_info(
-) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-    [ipadm_show_interface(), ipadm_show_addr(), ipadm_show_prop()]
-        .into_iter()
-        .map(|c| async move {
-            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
-        })
-        .collect::<FuturesUnordered<_>>()
-        .collect::<Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>>>()
-        .await
-}
-
-/// Retrieve various `dladm` command output for the system.
-pub async fn dladm_info(
-) -> Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>> {
-    [
-        dladm_show_phys(),
-        dladm_show_ether(),
-        dladm_show_link(),
-        dladm_show_vnic(),
-        dladm_show_linkprop(),
-    ]
-        .into_iter()
-        .map(|c| async move {
-            execute_command_with_timeout(c, DEFAULT_TIMEOUT).await
-        })
-        .collect::<FuturesUnordered<_>>()
-        .collect::<Vec<Result<SupportBundleCmdOutput, SupportBundleCmdError>>>()
-        .await
-}
-
 #[cfg(test)]
 mod test {
     use super::*;
@@ -246,7 +222,7 @@ mod test {
         match execute_command_with_timeout(command, Duration::from_millis(500))
             .await
         {
-            Err(SupportBundleCmdError::Timeout { .. }) => (),
+            Err(SledDiagnosticsCmdError::Timeout { .. }) => (),
             _ => panic!("command should have timed out"),
         }
     }
@@ -267,7 +243,7 @@ mod test {
     #[tokio::test]
     async fn test_command_stderr_is_correct() {
         let mut command = Command::new("bash");
-        command.env_clear().args(&["-c", "echo oxide computer > /dev/stderr"]);
+        command.env_clear().args(["-c", "echo oxide computer > /dev/stderr"]);
 
         let res = execute_command_with_timeout(command, Duration::from_secs(5))
             .await
@@ -279,7 +255,7 @@ mod test {
     #[tokio::test]
     async fn test_command_stdout_stderr_are_interleaved() {
         let mut command = Command::new("bash");
-        command.env_clear().args(&[
+        command.env_clear().args([
             "-c",
             "echo one > /dev/stdout \
             && echo two > /dev/stderr \

From de8bc93309f9eab8c431328065ea6f3d5bd0f34f Mon Sep 17 00:00:00 2001
From: Sean Klein <sean@oxide.computer>
Date: Fri, 13 Dec 2024 15:14:49 -0800
Subject: [PATCH 03/10] Optimizations to improve dataset ensuring performance
 (#7236)

Major part of https://github.com/oxidecomputer/omicron/issues/7217

Optimizations made:
- `zfs get` is queried, so properties are samples for all datasets of
interest ahead-of-time. No subsequent processes are exec'd for each
dataset that needs no changes.
- The "dataset ensure" process is now concurrent

These optimizations should significantly improve the latency of the "no
(or few) changes necessary" cases.

Optimizations still left to be made:
- Making blueprint execution concurrent, rather than blocking
one-sled-at-a-time
- Patching the illumos_utils "Zfs::ensure_filesystem" to re-use the
pre-fetched properties, and minimize re-querying
---
 Cargo.lock                             |   2 +
 illumos-utils/Cargo.toml               |   1 +
 illumos-utils/src/zfs.rs               | 239 +++++++++---------
 sled-agent/src/backing_fs.rs           |   3 +-
 sled-agent/src/bootstrap/pre_server.rs |   1 +
 sled-storage/Cargo.toml                |   1 +
 sled-storage/src/dataset.rs            |   2 +
 sled-storage/src/error.rs              |   3 +
 sled-storage/src/manager.rs            | 320 ++++++++++++++++++++-----
 9 files changed, 395 insertions(+), 177 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index f53a518f76..c9fc792f72 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -4464,6 +4464,7 @@ dependencies = [
  "futures",
  "http",
  "ipnetwork",
+ "itertools 0.13.0",
  "libc",
  "macaddr",
  "mockall",
@@ -10816,6 +10817,7 @@ dependencies = [
  "slog",
  "thiserror 1.0.69",
  "tokio",
+ "tokio-stream",
  "uuid",
 ]
 
diff --git a/illumos-utils/Cargo.toml b/illumos-utils/Cargo.toml
index e1421bd3ab..69276713bb 100644
--- a/illumos-utils/Cargo.toml
+++ b/illumos-utils/Cargo.toml
@@ -21,6 +21,7 @@ dropshot.workspace = true
 futures.workspace = true
 http.workspace = true
 ipnetwork.workspace = true
+itertools.workspace = true
 libc.workspace = true
 macaddr.workspace = true
 omicron-common.workspace = true
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index f9edb8de86..ee1ac58be9 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -9,9 +9,11 @@ use anyhow::anyhow;
 use anyhow::bail;
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
+use itertools::Itertools;
 use omicron_common::api::external::ByteCount;
 use omicron_common::disk::CompressionAlgorithm;
 use omicron_common::disk::DiskIdentity;
+use omicron_common::disk::SharedDatasetConfig;
 use omicron_uuid_kinds::DatasetUuid;
 use std::collections::BTreeMap;
 use std::fmt;
@@ -82,25 +84,19 @@ enum EnsureFilesystemErrorRaw {
 
 /// Error returned by [`Zfs::ensure_filesystem`].
 #[derive(thiserror::Error, Debug)]
-#[error(
-    "Failed to ensure filesystem '{name}' exists at '{mountpoint:?}': {err}"
-)]
+#[error("Failed to ensure filesystem '{name}': {err}")]
 pub struct EnsureFilesystemError {
     name: String,
-    mountpoint: Mountpoint,
     #[source]
     err: EnsureFilesystemErrorRaw,
 }
 
 /// Error returned by [`Zfs::set_oxide_value`]
 #[derive(thiserror::Error, Debug)]
-#[error(
-    "Failed to set value '{name}={value}' on filesystem {filesystem}: {err}"
-)]
+#[error("Failed to set values '{values}' on filesystem {filesystem}: {err}")]
 pub struct SetValueError {
     filesystem: String,
-    name: String,
-    value: String,
+    values: String,
     err: crate::ExecutionError,
 }
 
@@ -242,11 +238,27 @@ impl DatasetProperties {
         "oxide:uuid,name,avail,used,quota,reservation,compression";
 }
 
+impl TryFrom<&DatasetProperties> for SharedDatasetConfig {
+    type Error = anyhow::Error;
+
+    fn try_from(
+        props: &DatasetProperties,
+    ) -> Result<SharedDatasetConfig, Self::Error> {
+        Ok(SharedDatasetConfig {
+            compression: props.compression.parse()?,
+            quota: props.quota,
+            reservation: props.reservation,
+        })
+    }
+}
+
 impl DatasetProperties {
     /// Parses dataset properties, assuming that the caller is providing the
     /// output of the following command as stdout:
     ///
-    /// zfs get -rpo name,property,value,source $ZFS_GET_PROPS $DATASETS
+    /// zfs get \
+    ///     [maybe depth arguments] \
+    ///     -Hpo name,property,value,source $ZFS_GET_PROPS $DATASETS
     fn parse_many(
         stdout: &str,
     ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
@@ -307,14 +319,16 @@ impl DatasetProperties {
                     .parse::<u64>()
                     .context("Failed to parse 'used'")?
                     .try_into()?;
+
+                // The values of "quota" and "reservation" can be either "-" or
+                // "0" when they are not actually set. To be cautious, we treat
+                // both of these values as "the value has not been set
+                // explicitly". As a result, setting either of these values
+                // explicitly to zero is indistinguishable from setting them
+                // with a value of "none".
                 let quota = props
                     .get("quota")
-                    .filter(|(_prop, source)| {
-                        // If a quota has not been set explicitly, it has a default
-                        // source and a value of "zero". Rather than parsing the value
-                        // as zero, it should be ignored.
-                        *source != "default"
-                    })
+                    .filter(|(prop, _source)| *prop != "-" && *prop != "0")
                     .map(|(prop, _source)| {
                         prop.parse::<u64>().context("Failed to parse 'quota'")
                     })
@@ -322,12 +336,7 @@ impl DatasetProperties {
                     .and_then(|v| ByteCount::try_from(v).ok());
                 let reservation = props
                     .get("reservation")
-                    .filter(|(_prop, source)| {
-                        // If a reservation has not been set explicitly, it has a default
-                        // source and a value of "zero". Rather than parsing the value
-                        // as zero, it should be ignored.
-                        *source != "default"
-                    })
+                    .filter(|(prop, _source)| *prop != "-" && *prop != "0")
                     .map(|(prop, _source)| {
                         prop.parse::<u64>()
                             .context("Failed to parse 'reservation'")
@@ -375,7 +384,40 @@ impl fmt::Display for PropertySource {
     }
 }
 
-#[cfg_attr(any(test, feature = "testing"), mockall::automock, allow(dead_code))]
+#[derive(Copy, Clone, Debug)]
+pub enum WhichDatasets {
+    SelfOnly,
+    SelfAndChildren,
+}
+
+fn build_zfs_set_key_value_pairs(
+    size_details: Option<SizeDetails>,
+    dataset_id: Option<DatasetUuid>,
+) -> Vec<(&'static str, String)> {
+    let mut props = Vec::new();
+    if let Some(SizeDetails { quota, reservation, compression }) = size_details
+    {
+        let quota = quota
+            .map(|q| q.to_bytes().to_string())
+            .unwrap_or_else(|| String::from("none"));
+        props.push(("quota", quota));
+
+        let reservation = reservation
+            .map(|r| r.to_bytes().to_string())
+            .unwrap_or_else(|| String::from("none"));
+        props.push(("reservation", reservation));
+
+        let compression = compression.to_string();
+        props.push(("compression", compression));
+    }
+
+    if let Some(id) = dataset_id {
+        props.push(("oxide:uuid", id.to_string()));
+    }
+
+    props
+}
+
 impl Zfs {
     /// Lists all datasets within a pool or existing dataset.
     ///
@@ -399,7 +441,9 @@ impl Zfs {
     }
 
     /// Get information about datasets within a list of zpools / datasets.
-    /// Returns properties for all input datasets and their direct children.
+    /// Returns properties for all input datasets, and optionally, for
+    /// their children (depending on the value of [WhichDatasets] is provided
+    /// as input).
     ///
     /// This function is similar to [Zfs::list_datasets], but provides a more
     /// substantial results about the datasets found.
@@ -407,22 +451,30 @@ impl Zfs {
     /// Sorts results and de-duplicates them by name.
     pub fn get_dataset_properties(
         datasets: &[String],
+        which: WhichDatasets,
     ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
         let mut command = std::process::Command::new(ZFS);
-        let cmd = command.args(&[
-            "get",
-            "-d",
-            "1",
-            "-Hpo",
-            "name,property,value,source",
-        ]);
+        let cmd = command.arg("get");
+        match which {
+            WhichDatasets::SelfOnly => (),
+            WhichDatasets::SelfAndChildren => {
+                cmd.args(&["-d", "1"]);
+            }
+        }
+        cmd.args(&["-Hpo", "name,property,value,source"]);
 
         // Note: this is tightly coupled with the layout of DatasetProperties
         cmd.arg(DatasetProperties::ZFS_GET_PROPS);
         cmd.args(datasets);
 
-        let output = execute(cmd).with_context(|| {
-            format!("Failed to get dataset properties for {datasets:?}")
+        // We are intentionally ignoring the output status of this command.
+        //
+        // If one or more dataset doesn't exist, we can still read stdout to
+        // see about the ones that do exist.
+        let output = cmd.output().map_err(|err| {
+            anyhow!(
+                "Failed to get dataset properties for {datasets:?}: {err:?}"
+            )
         })?;
         let stdout = String::from_utf8(output.stdout)?;
 
@@ -490,23 +542,19 @@ impl Zfs {
         do_format: bool,
         encryption_details: Option<EncryptionDetails>,
         size_details: Option<SizeDetails>,
+        id: Option<DatasetUuid>,
         additional_options: Option<Vec<String>>,
     ) -> Result<(), EnsureFilesystemError> {
         let (exists, mounted) = Self::dataset_exists(name, &mountpoint)?;
+
+        let props = build_zfs_set_key_value_pairs(size_details, id);
         if exists {
-            if let Some(SizeDetails { quota, reservation, compression }) =
-                size_details
-            {
-                // apply quota and compression mode (in case they've changed across
-                // sled-agent versions since creation)
-                Self::apply_properties(
-                    name,
-                    &mountpoint,
-                    quota,
-                    reservation,
-                    compression,
-                )?;
-            }
+            Self::set_values(name, props.as_slice()).map_err(|err| {
+                EnsureFilesystemError {
+                    name: name.to_string(),
+                    err: err.err.into(),
+                }
+            })?;
 
             if encryption_details.is_none() {
                 // If the dataset exists, we're done. Unencrypted datasets are
@@ -518,14 +566,13 @@ impl Zfs {
                     return Ok(());
                 }
                 // We need to load the encryption key and mount the filesystem
-                return Self::mount_encrypted_dataset(name, &mountpoint);
+                return Self::mount_encrypted_dataset(name);
             }
         }
 
         if !do_format {
             return Err(EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint,
                 err: EnsureFilesystemErrorRaw::NotFoundNotFormatted,
             });
         }
@@ -561,7 +608,6 @@ impl Zfs {
 
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: err.into(),
         })?;
 
@@ -574,82 +620,27 @@ impl Zfs {
             let cmd = command.args(["chown", "-R", &user, &mount]);
             execute(cmd).map_err(|err| EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint: mountpoint.clone(),
                 err: err.into(),
             })?;
         }
 
-        if let Some(SizeDetails { quota, reservation, compression }) =
-            size_details
-        {
-            // Apply any quota and compression mode.
-            Self::apply_properties(
-                name,
-                &mountpoint,
-                quota,
-                reservation,
-                compression,
-            )?;
-        }
-
-        Ok(())
-    }
-
-    /// Applies the following properties to the filesystem.
-    ///
-    /// If any of the options are not supplied, a default "none" or "off"
-    /// value is supplied.
-    fn apply_properties(
-        name: &str,
-        mountpoint: &Mountpoint,
-        quota: Option<ByteCount>,
-        reservation: Option<ByteCount>,
-        compression: CompressionAlgorithm,
-    ) -> Result<(), EnsureFilesystemError> {
-        let quota = quota
-            .map(|q| q.to_bytes().to_string())
-            .unwrap_or_else(|| String::from("none"));
-        let reservation = reservation
-            .map(|r| r.to_bytes().to_string())
-            .unwrap_or_else(|| String::from("none"));
-        let compression = compression.to_string();
-
-        if let Err(err) = Self::set_value(name, "quota", &quota) {
-            return Err(EnsureFilesystemError {
-                name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
-                err: err.err.into(),
-            });
-        }
-        if let Err(err) = Self::set_value(name, "reservation", &reservation) {
-            return Err(EnsureFilesystemError {
-                name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
-                err: err.err.into(),
-            });
-        }
-        if let Err(err) = Self::set_value(name, "compression", &compression) {
-            return Err(EnsureFilesystemError {
+        Self::set_values(name, props.as_slice()).map_err(|err| {
+            EnsureFilesystemError {
                 name: name.to_string(),
-                mountpoint: mountpoint.clone(),
-                // Take the execution error from the SetValueError
                 err: err.err.into(),
-            });
-        }
+            }
+        })?;
+
         Ok(())
     }
 
     fn mount_encrypted_dataset(
         name: &str,
-        mountpoint: &Mountpoint,
     ) -> Result<(), EnsureFilesystemError> {
         let mut command = std::process::Command::new(PFEXEC);
         let cmd = command.args(&[ZFS, "mount", "-l", name]);
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: EnsureFilesystemErrorRaw::MountEncryptedFsFailed(err),
         })?;
         Ok(())
@@ -657,13 +648,11 @@ impl Zfs {
 
     pub fn mount_overlay_dataset(
         name: &str,
-        mountpoint: &Mountpoint,
     ) -> Result<(), EnsureFilesystemError> {
         let mut command = std::process::Command::new(PFEXEC);
         let cmd = command.args(&[ZFS, "mount", "-O", name]);
         execute(cmd).map_err(|err| EnsureFilesystemError {
             name: name.to_string(),
-            mountpoint: mountpoint.clone(),
             err: EnsureFilesystemErrorRaw::MountOverlayFsFailed(err),
         })?;
         Ok(())
@@ -689,7 +678,6 @@ impl Zfs {
             if &values[..3] != &[name, "filesystem", &mountpoint.to_string()] {
                 return Err(EnsureFilesystemError {
                     name: name.to_string(),
-                    mountpoint: mountpoint.clone(),
                     err: EnsureFilesystemErrorRaw::Output(stdout.to_string()),
                 });
             }
@@ -714,13 +702,29 @@ impl Zfs {
         name: &str,
         value: &str,
     ) -> Result<(), SetValueError> {
+        Self::set_values(filesystem_name, &[(name, value)])
+    }
+
+    fn set_values<K: std::fmt::Display, V: std::fmt::Display>(
+        filesystem_name: &str,
+        name_values: &[(K, V)],
+    ) -> Result<(), SetValueError> {
+        if name_values.is_empty() {
+            return Ok(());
+        }
+
         let mut command = std::process::Command::new(PFEXEC);
-        let value_arg = format!("{}={}", name, value);
-        let cmd = command.args(&[ZFS, "set", &value_arg, filesystem_name]);
+        let cmd = command.args(&[ZFS, "set"]);
+        for (name, value) in name_values {
+            cmd.arg(format!("{name}={value}"));
+        }
+        cmd.arg(filesystem_name);
         execute(cmd).map_err(|err| SetValueError {
             filesystem: filesystem_name.to_string(),
-            name: name.to_string(),
-            value: value.to_string(),
+            values: name_values
+                .iter()
+                .map(|(k, v)| format!("{k}={v}"))
+                .join(","),
             err,
         })?;
         Ok(())
@@ -808,10 +812,7 @@ impl Zfs {
             err,
         })
     }
-}
 
-// These methods don't work with mockall, so they exist in a separate impl block
-impl Zfs {
     /// Calls "zfs get" to acquire multiple values
     ///
     /// - `names`: The properties being acquired
diff --git a/sled-agent/src/backing_fs.rs b/sled-agent/src/backing_fs.rs
index 265c0152e2..d24f85ad80 100644
--- a/sled-agent/src/backing_fs.rs
+++ b/sled-agent/src/backing_fs.rs
@@ -150,6 +150,7 @@ pub(crate) fn ensure_backing_fs(
             true,  // do_format
             None,  // encryption_details,
             size_details,
+            None,
             Some(vec!["canmount=noauto".to_string()]), // options
         )?;
 
@@ -180,7 +181,7 @@ pub(crate) fn ensure_backing_fs(
 
         info!(log, "Mounting {} on {}", dataset, mountpoint);
 
-        Zfs::mount_overlay_dataset(&dataset, &mountpoint)?;
+        Zfs::mount_overlay_dataset(&dataset)?;
 
         if let Some(subdirs) = bfs.subdirs {
             for dir in subdirs {
diff --git a/sled-agent/src/bootstrap/pre_server.rs b/sled-agent/src/bootstrap/pre_server.rs
index 5b89506242..19a63cb71d 100644
--- a/sled-agent/src/bootstrap/pre_server.rs
+++ b/sled-agent/src/bootstrap/pre_server.rs
@@ -293,6 +293,7 @@ fn ensure_zfs_ramdisk_dataset() -> Result<(), StartError> {
         encryption_details,
         quota,
         None,
+        None,
     )
     .map_err(StartError::EnsureZfsRamdiskDataset)
 }
diff --git a/sled-storage/Cargo.toml b/sled-storage/Cargo.toml
index 2439c52aa7..d47a89e0ae 100644
--- a/sled-storage/Cargo.toml
+++ b/sled-storage/Cargo.toml
@@ -28,6 +28,7 @@ sled-hardware.workspace = true
 slog.workspace = true
 thiserror.workspace = true
 tokio.workspace = true
+tokio-stream.workspace = true
 uuid.workspace = true
 omicron-workspace-hack.workspace = true
 
diff --git a/sled-storage/src/dataset.rs b/sled-storage/src/dataset.rs
index e90a4c0092..717595ad9e 100644
--- a/sled-storage/src/dataset.rs
+++ b/sled-storage/src/dataset.rs
@@ -269,6 +269,7 @@ pub(crate) async fn ensure_zpool_has_datasets(
             Some(encryption_details),
             None,
             None,
+            None,
         );
 
         keyfile.zero_and_unlink().await.map_err(|error| {
@@ -331,6 +332,7 @@ pub(crate) async fn ensure_zpool_has_datasets(
             encryption_details,
             size_details,
             None,
+            None,
         )?;
 
         if dataset.wipe {
diff --git a/sled-storage/src/error.rs b/sled-storage/src/error.rs
index 351dd7f353..f00e35e654 100644
--- a/sled-storage/src/error.rs
+++ b/sled-storage/src/error.rs
@@ -77,6 +77,9 @@ pub enum Error {
     #[error("Not ready to manage U.2s (key manager is not ready)")]
     KeyManagerNotReady,
 
+    #[error("Physical disk configuration changed for the same generation number: {generation}")]
+    PhysicalDiskConfigurationChanged { generation: Generation },
+
     #[error("Physical disk configuration out-of-date (asked for {requested}, but latest is {current})")]
     PhysicalDiskConfigurationOutdated {
         requested: Generation,
diff --git a/sled-storage/src/manager.rs b/sled-storage/src/manager.rs
index a760285d3f..b1832ca92b 100644
--- a/sled-storage/src/manager.rs
+++ b/sled-storage/src/manager.rs
@@ -14,7 +14,9 @@ use camino::Utf8Path;
 use camino::Utf8PathBuf;
 use debug_ignore::DebugIgnore;
 use futures::future::FutureExt;
-use illumos_utils::zfs::{DatasetProperties, Mountpoint, Zfs};
+use futures::Stream;
+use futures::StreamExt;
+use illumos_utils::zfs::{DatasetProperties, Mountpoint, WhichDatasets, Zfs};
 use illumos_utils::zpool::{ZpoolName, ZPOOL_MOUNTPOINT_ROOT};
 use key_manager::StorageKeyRequester;
 use omicron_common::disk::{
@@ -26,6 +28,7 @@ use omicron_common::ledger::Ledger;
 use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::GenericUuid;
 use slog::{error, info, o, warn, Logger};
+use std::collections::BTreeMap;
 use std::collections::HashSet;
 use std::future::Future;
 use tokio::sync::{mpsc, oneshot, watch};
@@ -929,21 +932,107 @@ impl StorageManager {
     // includes details about all possible errors that may occur on
     // a per-dataset granularity.
     async fn datasets_ensure_internal(
-        &mut self,
+        &self,
         log: &Logger,
         config: &DatasetsConfig,
     ) -> DatasetsManagementResult {
-        let mut status = vec![];
-        for dataset in config.datasets.values() {
-            status.push(self.dataset_ensure_internal(log, dataset).await);
-        }
+        // Gather properties about these datasets, if they exist.
+        //
+        // This pre-fetching lets us avoid individually querying them later.
+        let old_datasets = Zfs::get_dataset_properties(
+            config
+                .datasets
+                .values()
+                .map(|config| config.name.full_name())
+                .collect::<Vec<String>>()
+                .as_slice(),
+            WhichDatasets::SelfOnly,
+        )
+        .unwrap_or_default()
+        .into_iter()
+        .map(|props| (props.name.clone(), props))
+        .collect::<BTreeMap<String, _>>();
+
+        let futures = config.datasets.values().map(|dataset| async {
+            self.dataset_ensure_internal(
+                log,
+                dataset,
+                old_datasets.get(&dataset.name.full_name()),
+            )
+            .await
+        });
+
+        // This "Box::pin" is a workaround for: https://github.com/rust-lang/rust/issues/64552
+        //
+        // Ideally, we would just use:
+        //
+        // ```
+        // let status: Vec<_> = futures::stream::iter(futures)
+        //      .buffered(...)
+        //      .collect()
+        //      .await;
+        // ```
+        const DATASET_ENSURE_CONCURRENCY_LIMIT: usize = 16;
+        let results: std::pin::Pin<Box<dyn Stream<Item = _> + Send>> = Box::pin(
+            futures::stream::iter(futures)
+                .buffered(DATASET_ENSURE_CONCURRENCY_LIMIT),
+        );
+
+        let status: Vec<DatasetManagementStatus> = results.collect().await;
+
         DatasetsManagementResult { status }
     }
 
+    fn should_skip_dataset_ensure(
+        log: &Logger,
+        config: &DatasetConfig,
+        old_dataset: Option<&DatasetProperties>,
+    ) -> Result<bool, Error> {
+        let Some(old_dataset) = old_dataset else {
+            info!(log, "This dataset did not exist");
+            return Ok(false);
+        };
+
+        let Some(old_id) = old_dataset.id else {
+            info!(log, "Old properties missing UUID");
+            return Ok(false);
+        };
+
+        if old_id != config.id {
+            return Err(Error::UuidMismatch {
+                name: config.name.full_name(),
+                old: old_id.into_untyped_uuid(),
+                new: config.id.into_untyped_uuid(),
+            });
+        }
+
+        let old_props = match SharedDatasetConfig::try_from(old_dataset) {
+            Ok(old_props) => old_props,
+            Err(err) => {
+                warn!(log, "Failed to parse old properties"; "err" => #%err);
+                return Ok(false);
+            }
+        };
+
+        info!(log, "Parsed old dataset properties"; "props" => ?old_props);
+        if old_props != config.inner {
+            info!(
+                log,
+                "Dataset properties changed";
+                "old_props" => ?old_props,
+                "requested_props" => ?config.inner,
+            );
+            return Ok(false);
+        }
+        info!(log, "No changes necessary, returning early");
+        return Ok(true);
+    }
+
     async fn dataset_ensure_internal(
-        &mut self,
+        &self,
         log: &Logger,
         config: &DatasetConfig,
+        old_dataset: Option<&DatasetProperties>,
     ) -> DatasetManagementStatus {
         let log = log.new(o!("name" => config.name.full_name()));
         info!(log, "Ensuring dataset");
@@ -952,6 +1041,15 @@ impl StorageManager {
             err: None,
         };
 
+        match Self::should_skip_dataset_ensure(&log, config, old_dataset) {
+            Ok(true) => return status,
+            Ok(false) => (),
+            Err(err) => {
+                status.err = Some(err.to_string());
+                return status;
+            }
+        }
+
         let mountpoint_root = &self.resources.disks().mount_config().root;
         let mountpoint_path = config.name.mountpoint(mountpoint_root);
         let details = DatasetCreationDetails {
@@ -961,9 +1059,9 @@ impl StorageManager {
         };
 
         if let Err(err) = self
-            .ensure_dataset_with_id(
+            .ensure_dataset(
                 config.name.pool(),
-                config.id,
+                Some(config.id),
                 &config.inner,
                 &details,
             )
@@ -1017,8 +1115,11 @@ impl StorageManager {
         ];
 
         info!(log, "Listing datasets within zpool"; "zpool" => zpool.to_string());
-        Zfs::get_dataset_properties(datasets_of_interest.as_slice())
-            .map_err(Error::Other)
+        Zfs::get_dataset_properties(
+            datasets_of_interest.as_slice(),
+            WhichDatasets::SelfAndChildren,
+        )
+        .map_err(Error::Other)
     }
 
     // Ensures that a dataset exists, nested somewhere arbitrary within
@@ -1039,8 +1140,13 @@ impl StorageManager {
             full_name: config.name.full_name(),
         };
 
-        self.ensure_dataset(config.name.root.pool(), &config.inner, &details)
-            .await?;
+        self.ensure_dataset(
+            config.name.root.pool(),
+            None,
+            &config.inner,
+            &details,
+        )
+        .await?;
 
         Ok(())
     }
@@ -1073,15 +1179,18 @@ impl StorageManager {
         info!(log, "Listing nested datasets");
 
         let full_name = name.full_name();
-        let properties =
-            Zfs::get_dataset_properties(&[full_name]).map_err(|e| {
-                warn!(
-                    log,
-                    "Failed to access nested dataset";
-                    "name" => ?name
-                );
-                crate::dataset::DatasetError::Other(e)
-            })?;
+        let properties = Zfs::get_dataset_properties(
+            &[full_name],
+            WhichDatasets::SelfAndChildren,
+        )
+        .map_err(|e| {
+            warn!(
+                log,
+                "Failed to access nested dataset";
+                "name" => ?name
+            );
+            crate::dataset::DatasetError::Other(e)
+        })?;
 
         let root_path = name.root.full_name();
         Ok(properties
@@ -1159,12 +1268,29 @@ impl StorageManager {
                         requested: config.generation,
                         current: ledger_data.generation,
                     });
-                }
-
-                // TODO: If the generation is equal, check that the values are
-                // also equal.
+                } else if config.generation == ledger_data.generation {
+                    info!(
+                        log,
+                        "Requested generation number matches prior request",
+                    );
 
-                info!(log, "Request looks newer than prior requests");
+                    if ledger_data != &config {
+                        error!(
+                            log,
+                            "Requested configuration changed (with the same generation)";
+                            "generation" => ?config.generation
+                        );
+                        return Err(Error::PhysicalDiskConfigurationChanged {
+                            generation: config.generation,
+                        });
+                    }
+                    info!(
+                        log,
+                        "Request looks identical to last request, re-sending"
+                    );
+                } else {
+                    info!(log, "Request looks newer than prior requests");
+                }
                 ledger
             }
             None => {
@@ -1291,40 +1417,13 @@ impl StorageManager {
         }
     }
 
-    // Invokes [Self::ensure_dataset] and also ensures the dataset has an
-    // expected UUID as a ZFS property.
-    async fn ensure_dataset_with_id(
-        &mut self,
-        zpool: &ZpoolName,
-        id: DatasetUuid,
-        config: &SharedDatasetConfig,
-        details: &DatasetCreationDetails,
-    ) -> Result<(), Error> {
-        self.ensure_dataset(zpool, config, details).await?;
-
-        // Ensure the dataset has a usable UUID.
-        if let Ok(id_str) = Zfs::get_oxide_value(&details.full_name, "uuid") {
-            if let Ok(found_id) = id_str.parse::<DatasetUuid>() {
-                if found_id != id {
-                    return Err(Error::UuidMismatch {
-                        name: details.full_name.clone(),
-                        old: found_id.into_untyped_uuid(),
-                        new: id.into_untyped_uuid(),
-                    });
-                }
-                return Ok(());
-            }
-        }
-        Zfs::set_oxide_value(&details.full_name, "uuid", &id.to_string())?;
-        Ok(())
-    }
-
     // Ensures a dataset exists within a zpool.
     //
     // Confirms that the zpool exists and is managed by this sled.
     async fn ensure_dataset(
-        &mut self,
+        &self,
         zpool: &ZpoolName,
+        dataset_id: Option<DatasetUuid>,
         config: &SharedDatasetConfig,
         details: &DatasetCreationDetails,
     ) -> Result<(), Error> {
@@ -1365,6 +1464,7 @@ impl StorageManager {
             do_format,
             encryption_details,
             size_details,
+            dataset_id,
             None,
         )?;
 
@@ -1401,6 +1501,7 @@ impl StorageManager {
             do_format,
             encryption_details,
             size_details,
+            Some(DatasetUuid::from_untyped_uuid(request.dataset_id)),
             None,
         )?;
         // Ensure the dataset has a usable UUID.
@@ -1725,7 +1826,7 @@ mod tests {
         );
         let mut harness = StorageManagerTestHarness::new(&logctx.log).await;
 
-        // Queue up a disks, as we haven't told the `StorageManager` that
+        // Queue up a disk, as we haven't told the `StorageManager` that
         // the `KeyManager` is ready yet.
         let raw_disks =
             harness.add_vdevs(&["u2_under_test.vdev", "m2_helping.vdev"]).await;
@@ -1996,6 +2097,111 @@ mod tests {
         logctx.cleanup_successful();
     }
 
+    #[tokio::test]
+    async fn ensure_many_datasets() {
+        illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);
+        let logctx = test_setup_log("ensure_many_datasets");
+        let mut harness = StorageManagerTestHarness::new(&logctx.log).await;
+
+        // Test setup: Add U.2s and an M.2, adopt them into the "control plane"
+        // for usage.
+        harness.handle().key_manager_ready().await;
+        let raw_disks = harness
+            .add_vdevs(&[
+                "u2_0.vdev",
+                "u2_1.vdev",
+                "u2_2.vdev",
+                "u2_3.vdev",
+                "u2_4.vdev",
+                "u2_5.vdev",
+                "u2_6.vdev",
+                "u2_7.vdev",
+                "u2_8.vdev",
+                "u2_9.vdev",
+                "m2_helping.vdev",
+            ])
+            .await;
+        let config = harness.make_config(1, &raw_disks);
+
+        let result = harness
+            .handle()
+            .omicron_physical_disks_ensure(config.clone())
+            .await
+            .expect("Ensuring disks should work after key manager is ready");
+        assert!(!result.has_error(), "{:?}", result);
+
+        let result = harness
+            .handle()
+            .omicron_physical_disks_ensure(config.clone())
+            .await
+            .expect("Ensuring disks should work after key manager is ready");
+        assert!(!result.has_error(), "{:?}", result);
+
+        // Create datasets on the newly formatted U.2s
+        let mut datasets = BTreeMap::new();
+        for i in 0..10 {
+            let zpool_name = ZpoolName::new_external(config.disks[i].pool_id);
+
+            let id = DatasetUuid::new_v4();
+            let name =
+                DatasetName::new(zpool_name.clone(), DatasetKind::Crucible);
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+
+            let id = DatasetUuid::new_v4();
+            let name = DatasetName::new(zpool_name.clone(), DatasetKind::Debug);
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+
+            let id = DatasetUuid::new_v4();
+            let name = DatasetName::new(
+                zpool_name.clone(),
+                DatasetKind::TransientZoneRoot,
+            );
+            datasets.insert(
+                id,
+                DatasetConfig {
+                    id,
+                    name,
+                    inner: SharedDatasetConfig::default(),
+                },
+            );
+        }
+        // "Generation = 1" is reserved as "no requests seen yet", so we jump
+        // past it.
+        let generation = Generation::new().next();
+        let config = DatasetsConfig { generation, datasets };
+
+        let status =
+            harness.handle().datasets_ensure(config.clone()).await.unwrap();
+        assert!(!status.has_error());
+
+        // List datasets, expect to see what we just created
+        let observed_config =
+            harness.handle().datasets_config_list().await.unwrap();
+        assert_eq!(config, observed_config);
+
+        // Calling "datasets_ensure" with the same input should succeed.
+        let status =
+            harness.handle().datasets_ensure(config.clone()).await.unwrap();
+        assert!(!status.has_error());
+
+        harness.cleanup().await;
+        logctx.cleanup_successful();
+    }
+
     #[tokio::test]
     async fn nested_dataset() {
         illumos_utils::USE_MOCKS.store(false, Ordering::SeqCst);

From c7663bd36aa28cb17a43acfe97a2f9b9265e48bd Mon Sep 17 00:00:00 2001
From: David Crespo <david-crespo@users.noreply.github.com>
Date: Fri, 13 Dec 2024 19:19:27 -0600
Subject: [PATCH 04/10] Bump web console (gateways UI, SSH key view, fixes)
 (#7250)

https://github.com/oxidecomputer/console/compare/927c8b63...c1ebd8d9

* [c1ebd8d9](https://github.com/oxidecomputer/console/commit/c1ebd8d9)
take inventory switches tab back out, DB is empty
* [2a139028](https://github.com/oxidecomputer/console/commit/2a139028)
tone down empty switches table message
* [2b0c3f12](https://github.com/oxidecomputer/console/commit/2b0c3f12)
oxidecomputer/console#2621
* [c1fbf631](https://github.com/oxidecomputer/console/commit/c1fbf631)
oxidecomputer/console#2622
* [1dd25f63](https://github.com/oxidecomputer/console/commit/1dd25f63)
oxidecomputer/console#2615
* [11486f8b](https://github.com/oxidecomputer/console/commit/11486f8b)
oxidecomputer/console#2620
* [ada302ce](https://github.com/oxidecomputer/console/commit/ada302ce)
oxidecomputer/console#2488
* [bc3161ae](https://github.com/oxidecomputer/console/commit/bc3161ae)
minor: remove stub e2e test
* [9e1d53c6](https://github.com/oxidecomputer/console/commit/9e1d53c6)
sentence case idp form heading
* [aaf1154f](https://github.com/oxidecomputer/console/commit/aaf1154f)
add extra assert for instance create with additional disks test flake
* [79d610dc](https://github.com/oxidecomputer/console/commit/79d610dc)
oxidecomputer/console#2618
* [bdbc02b7](https://github.com/oxidecomputer/console/commit/bdbc02b7)
oxidecomputer/console#2589
* [7a8ee0ab](https://github.com/oxidecomputer/console/commit/7a8ee0ab)
oxidecomputer/console#2614
* [0b5220a1](https://github.com/oxidecomputer/console/commit/0b5220a1)
npm audit fix
* [0c873cf4](https://github.com/oxidecomputer/console/commit/0c873cf4)
oxidecomputer/console#2610
* [d031c8ff](https://github.com/oxidecomputer/console/commit/d031c8ff)
bump playwright for navigation hang fix
* [dbd8545e](https://github.com/oxidecomputer/console/commit/dbd8545e)
oxidecomputer/console#2609
* [dc5562fe](https://github.com/oxidecomputer/console/commit/dc5562fe)
move error log to avoid failing to log certain errors
---
 tools/console_version | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/tools/console_version b/tools/console_version
index 08078c264e..85ca41f755 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="927c8b63a6f97c230cd8766a80fa1cfef6429eb4"
-SHA2="96550b6e485aaee1c6ced00a4a1aeec86267c99fc79a4b2b253141cf0222d346"
+COMMIT="c1ebd8d9acae4ff7a09b2517265fba52ebdfe82e"
+SHA2="840dbfda1c0def66212e7602d7be6e8acf1b26ba218f10ce3e627df49f5ce9e2"

From 0fa1bc2bc447d63c2ed1bc3979a1298f2ae3ce4c Mon Sep 17 00:00:00 2001
From: Rain <rain@oxide.computer>
Date: Fri, 13 Dec 2024 17:29:57 -0800
Subject: [PATCH 05/10] update nextest to 0.9.86 (#7239)

Includes some fixes to JUnit support, including representing setup scripts in
the output.
---
 .config/nextest.toml                | 2 +-
 .github/buildomat/build-and-test.sh | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/.config/nextest.toml b/.config/nextest.toml
index b1699e6717..cc21a442b3 100644
--- a/.config/nextest.toml
+++ b/.config/nextest.toml
@@ -3,7 +3,7 @@
 #
 # The required version should be bumped up if we need new features, performance
 # improvements or bugfixes that are present in newer versions of nextest.
-nextest-version = { required = "0.9.77", recommended = "0.9.78" }
+nextest-version = { required = "0.9.77", recommended = "0.9.86" }
 
 experimental = ["setup-scripts"]
 
diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh
index 28c1dbc0e6..605bf91b01 100755
--- a/.github/buildomat/build-and-test.sh
+++ b/.github/buildomat/build-and-test.sh
@@ -18,7 +18,7 @@ target_os=$1
 # NOTE: This version should be in sync with the recommended version in
 # .config/nextest.toml. (Maybe build an automated way to pull the recommended
 # version in the future.)
-NEXTEST_VERSION='0.9.78'
+NEXTEST_VERSION='0.9.86'
 
 cargo --version
 rustc --version

From 5ceea0f4f16d87294e3043459e11c2d05b51ce6d Mon Sep 17 00:00:00 2001
From: Benjamin Naecker <ben@oxide.computer>
Date: Fri, 13 Dec 2024 19:47:02 -0800
Subject: [PATCH 06/10] Check for correct error message in timeseries retry
 loop (#7253)

---
 nexus/tests/integration_tests/metrics.rs | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs
index 7e5441c16a..5e5268fb55 100644
--- a/nexus/tests/integration_tests/metrics.rs
+++ b/nexus/tests/integration_tests/metrics.rs
@@ -399,8 +399,7 @@ pub async fn execute_timeseries_query(
     if rsp.status.is_client_error() {
         let text = std::str::from_utf8(&rsp.body)
             .expect("Timeseries query response body should be UTF-8");
-        if text.contains("Schema for timeseries") && text.contains("not found")
-        {
+        if text.starts_with("Timeseries not found for: ") {
             return None;
         }
     }

From 89d6c764f48589798c0ee552e857f51f83fb94b0 Mon Sep 17 00:00:00 2001
From: Benjamin Naecker <ben@oxide.computer>
Date: Fri, 13 Dec 2024 20:51:57 -0800
Subject: [PATCH 07/10] Add DTrace scripts to Nexus zone (#7244)

- Adds a script for tracing all transactions run by Nexus, including
their overall latency and the number of statements in each one.
- Move all existing scripts to a Nexus subdirectory, and then include
that whole directory in the Omicron zone for Nexus itself.
- Closes #7224
---
 Cargo.lock                                    |  3 +-
 package-manifest.toml                         |  1 +
 .../{ => nexus}/aggregate-query-latency.d     |  0
 tools/dtrace/{ => nexus}/slowest-queries.d    |  2 +-
 tools/dtrace/{ => nexus}/trace-db-queries.d   |  0
 tools/dtrace/nexus/trace-transactions.d       | 67 +++++++++++++++++++
 workspace-hack/Cargo.toml                     |  2 +
 7 files changed, 73 insertions(+), 2 deletions(-)
 rename tools/dtrace/{ => nexus}/aggregate-query-latency.d (100%)
 rename tools/dtrace/{ => nexus}/slowest-queries.d (99%)
 rename tools/dtrace/{ => nexus}/trace-db-queries.d (100%)
 create mode 100755 tools/dtrace/nexus/trace-transactions.d

diff --git a/Cargo.lock b/Cargo.lock
index c9fc792f72..6a44c7af3d 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -7398,6 +7398,7 @@ dependencies = [
  "getrandom",
  "group",
  "hashbrown 0.15.1",
+ "heck 0.4.1",
  "hex",
  "hickory-proto",
  "hmac",
@@ -11020,7 +11021,7 @@ version = "0.8.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "03c3c6b7927ffe7ecaa769ee0e3994da3b8cafc8f444578982c83ecb161af917"
 dependencies = [
- "heck 0.5.0",
+ "heck 0.4.1",
  "proc-macro2",
  "quote",
  "syn 2.0.87",
diff --git a/package-manifest.toml b/package-manifest.toml
index 809c1ce6ca..83b1ba8168 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -125,6 +125,7 @@ source.paths = [
   { from = "smf/nexus/{{rack-topology}}", to = "/var/svc/manifest/site/nexus" },
   { from = "out/console-assets", to = "/var/nexus/static" },
   { from = "schema/crdb", to = "/var/nexus/schema/crdb" },
+  { from = "tools/dtrace/nexus", to = "/opt/oxide/dtrace/nexus" },
 ]
 output.type = "zone"
 setup_hint = """
diff --git a/tools/dtrace/aggregate-query-latency.d b/tools/dtrace/nexus/aggregate-query-latency.d
similarity index 100%
rename from tools/dtrace/aggregate-query-latency.d
rename to tools/dtrace/nexus/aggregate-query-latency.d
diff --git a/tools/dtrace/slowest-queries.d b/tools/dtrace/nexus/slowest-queries.d
similarity index 99%
rename from tools/dtrace/slowest-queries.d
rename to tools/dtrace/nexus/slowest-queries.d
index 76e22de22f..08b40d4d79 100755
--- a/tools/dtrace/slowest-queries.d
+++ b/tools/dtrace/nexus/slowest-queries.d
@@ -35,7 +35,7 @@ diesel_db$target:::query-done
     query[this->conn_id] = NULL;
 }
 
-tick-5s
+tick-10s
 {
     printf("\n%Y\n", walltimestamp);
     trunc(@, 5);
diff --git a/tools/dtrace/trace-db-queries.d b/tools/dtrace/nexus/trace-db-queries.d
similarity index 100%
rename from tools/dtrace/trace-db-queries.d
rename to tools/dtrace/nexus/trace-db-queries.d
diff --git a/tools/dtrace/nexus/trace-transactions.d b/tools/dtrace/nexus/trace-transactions.d
new file mode 100755
index 0000000000..baf7818f72
--- /dev/null
+++ b/tools/dtrace/nexus/trace-transactions.d
@@ -0,0 +1,67 @@
+#!/usr/sbin/dtrace -qs
+
+/* Trace all transactions to the control plane database with their latency */
+
+dtrace:::BEGIN
+{
+    printf("Tracing all database transactions for nexus PID %d, use Ctrl-C to exit\n", $target);
+}
+
+/*
+ * Record the start and number of statements for each transaction.
+ *
+ * Note that we're using the Nexus-provided transaction start / done probes.
+ * This lets us associate the other data we might collect (number of statements,
+ * ustacks, etc) with the Nexus code itself. Although there are transaction
+ * start / done probes in `diesel-dtrace`, the existing way we run transactions
+ * with `async-bb8-diesel` involves spawning a future to run the transactions on
+ * a blocking thread-pool. That spawning makes it impossible to associate the
+ * context in which the `diesel-dtrace` probes fire with the Nexus code that
+ * actually spawned the transaction itself.
+ */
+nexus_db_queries$target:::transaction-start
+{
+    this->key = copyinstr(arg0);
+    transaction_names[this->key] = copyinstr(arg1);
+    ts[this->key] = timestamp;
+    n_statements[this->key] = 0;
+    printf(
+        "Started transaction '%s' on conn %s\n",
+        transaction_names[this->key],
+        json(this->key, "ok")
+    );
+}
+
+/*
+ * When a query runs in the context of a transaction (on the same connection),
+ * bump the statement counter.
+ */
+diesel_db$target:::query-start
+/ts[copyinstr(arg1)]/
+{
+    n_statements[copyinstr(arg1)] += 1
+}
+
+/*
+ * As transactions complete, print the number of statements we ran and the
+ * duration.
+ */
+nexus_db_queries$target:::transaction-done
+/ts[copyinstr(arg0)]/
+{
+    this->key = copyinstr(arg0);
+    this->conn_id = json(this->key, "ok");
+    this->latency = (timestamp - ts[this->key]) / 1000;
+    this->n_statements = n_statements[this->key];
+    printf(
+        "%s %d statement(s) in transaction '%s' on connection %s (%d us)\n",
+        arg2 ? "COMMIT" : "ROLLBACK",
+        n_statements[this->key],
+        transaction_names[this->key],
+        this->conn_id,
+        this->latency
+    );
+    ts[this->key] = 0;
+    n_statements[this->key] = 0;
+    transaction_names[this->key] = 0;
+}
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index 31677ed8c1..678170b25e 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -63,6 +63,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
 hashbrown = { version = "0.15.1" }
+heck = { version = "0.4.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }
@@ -182,6 +183,7 @@ generic-array = { version = "0.14.7", default-features = false, features = ["mor
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
 hashbrown = { version = "0.15.1" }
+heck = { version = "0.4.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }

From 3449a645add39ea8d857556c49d4bbb522cd92b6 Mon Sep 17 00:00:00 2001
From: David Pacheco <dap@oxidecomputer.com>
Date: Sat, 14 Dec 2024 12:14:46 -0800
Subject: [PATCH 08/10] decide whether each API should be server- or
 client-side-versioned (#7138)

---
 .github/buildomat/build-and-test.sh          |   3 +-
 dev-tools/ls-apis/README.adoc                |  24 ++
 dev-tools/ls-apis/api-manifest.toml          |  76 ++++
 dev-tools/ls-apis/src/api_metadata.rs        |  34 ++
 dev-tools/ls-apis/src/bin/ls-apis.rs         |  99 ++++-
 dev-tools/ls-apis/src/lib.rs                 |   1 +
 dev-tools/ls-apis/src/system_apis.rs         | 364 ++++++++++++++++++-
 dev-tools/ls-apis/tests/api_dependencies.out |   1 -
 8 files changed, 591 insertions(+), 11 deletions(-)

diff --git a/.github/buildomat/build-and-test.sh b/.github/buildomat/build-and-test.sh
index 605bf91b01..369274eb7a 100755
--- a/.github/buildomat/build-and-test.sh
+++ b/.github/buildomat/build-and-test.sh
@@ -69,7 +69,8 @@ banner ls-apis
     source ./tools/include/force-git-over-https.sh;
     ptime -m cargo xtask ls-apis apis &&
         ptime -m cargo xtask ls-apis deployment-units &&
-        ptime -m cargo xtask ls-apis servers
+        ptime -m cargo xtask ls-apis servers &&
+        ptime -m cargo xtask ls-apis check
 )
 
 #
diff --git a/dev-tools/ls-apis/README.adoc b/dev-tools/ls-apis/README.adoc
index 00c229bff2..6b1aa1cdea 100644
--- a/dev-tools/ls-apis/README.adoc
+++ b/dev-tools/ls-apis/README.adoc
@@ -104,6 +104,30 @@ You can generate a PNG image of the graph like this:
 $ dot -T png -o deployment-units.png deployment-units.dot
 ```
 
+=== Checking the upgrade DAG
+
+As of this writing:
+
+* All Dropshot/Progenitor APIs in the system are identified with this tool (as far as we know).
+* Every API is annotated with metadata in `api-manifest.toml`.
+* The API metadata specifies whether versioning for the API is managed on the server side exclusively or using client-side versioning as well.  We've made this choice for every existing API.
+* There are no cycles in the server-side-managed-API dependency graph.
+
+This tool verifies these properties.  If you add a new API or a new API dependency without adding valid metadata, or if that metadata breaks these constraints, you will have to fix this up before you can land the change to Omicron.
+
+You can verify the versioning metadata using:
+
+```
+$ cargo xtask ls-apis check
+```
+
+You might see any of the following errors:
+
+* `+missing field `versioned_how`+`: this probably means that you added an API to the metadata but neglected to specify the `versioned_how` field.  This must be either `client` or `server`.  In general, prefer `server`.  The tool should tell you if you try to use `server` but that can't work.  If in doubt, ask the update team.
+* `at least one API has unknown version strategy (see above)`: this probably means you added an API with `version_how = "unknown"`.  (The specific API(s) with this problem are listed immediately above this error.)  You need to instead decide if it should be client-side or server-side versioned.  If in doubt, ask the update team.
+* `+missing field `versioned_how_reason`+`: this probably means that you added an API with `versioned_how = "client"`, but did not add a `versioned_how_reason`.  This field is required.  It's a free-text field that explains why we couldn't use server-side versioning for this API.
+* `graph of server-managed components has a cycle (includes node: "omicron-nexus")`: this probably means that you added an API with `versioned_how = "server"`, but that created a circular dependency with other server-side-versioned components.  You'll need to break the cycle by changing one of these components (probably your new one) to be client-managed instead.
+* `API identified by client package "nexus-client" (Nexus Internal API) is the "client" in a "non-dag" dependency rule, but its "versioned_how" is not "client"` (the specific APIs may be different): this (unlikely) condition means just what it says: the API manifest file contains a dependency rule saying that some API is *not* part of the update DAG, but that API is not marked accordingly.  See the documentation below on filter rules.
 
 == Details
 
diff --git a/dev-tools/ls-apis/api-manifest.toml b/dev-tools/ls-apis/api-manifest.toml
index 766708f478..75f4777a48 100644
--- a/dev-tools/ls-apis/api-manifest.toml
+++ b/dev-tools/ls-apis/api-manifest.toml
@@ -132,6 +132,28 @@ packages = [ "oximeter-collector" ]
 #
 # `server_package_name`: the package that contains the Dropshot API definition.
 #
+# `versioned_how`: one of:
+#
+#   "server":  this component uses server-side versioning only, meaning that the
+#              update system can ensure that servers are always updated before
+#              clients.
+#
+#   "client":  this component uses client-side versioning, meaning that the
+#              update system cannot ensure that servers are always updated
+#              before clients, so clients need to deal with multiple different
+#              server versions.
+#
+#              You must also specify `versioned_how_reason` if you use "client".
+#
+#   "unknown": this has not been determined.  This will break tests.  But the
+#              tooling supports this value so that during development you can
+#              relax these constraints.
+#
+# [`versioned_how_reason`]: free text string explaining why `versioned_how` must
+# be "client" for this API.  This is printed in documentation and command output
+# and serves as documentation for developers to understand why we made this
+# choice.  This should be present only if `versioned_how = "client"`.
+#
 # [`notes`]: optional free-form human-readable summary documentation about this
 # API
 ################################################################################
@@ -140,11 +162,14 @@ packages = [ "oximeter-collector" ]
 client_package_name = "bootstrap-agent-client"
 label = "Bootstrap Agent"
 server_package_name = "bootstrap-agent-api"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 
 [[apis]]
 client_package_name = "clickhouse-admin-keeper-client"
 label = "Clickhouse Cluster Admin for Keepers"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside multi-node Clickhouse keeper zones that's \
 responsible for local configuration and monitoring.
@@ -154,6 +179,7 @@ responsible for local configuration and monitoring.
 client_package_name = "clickhouse-admin-server-client"
 label = "Clickhouse Cluster Admin for Servers"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside multi-node Clickhouse server zones that's \
 responsible for local configuration and monitoring.
@@ -163,6 +189,7 @@ responsible for local configuration and monitoring.
 client_package_name = "clickhouse-admin-single-client"
 label = "Clickhouse Single-Node Cluster Admin"
 server_package_name = "clickhouse-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside single-node Clickhouse server zones that's \
 responsible for local configuration and monitoring.
@@ -172,6 +199,7 @@ responsible for local configuration and monitoring.
 client_package_name = "cockroach-admin-client"
 label = "CockroachDB Cluster Admin"
 server_package_name = "cockroach-admin-api"
+versioned_how = "server"
 notes = """
 This is the server running inside CockroachDB zones that performs \
 configuration and monitoring that requires the `cockroach` CLI.
@@ -181,11 +209,14 @@ configuration and monitoring that requires the `cockroach` CLI.
 client_package_name = "crucible-agent-client"
 label = "Crucible Agent"
 server_package_name = "crucible-agent"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "repair-client"
 label = "Crucible Repair"
 server_package_name = "crucible-downstairs"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 notes = """
 The repair service offered by a crucible-downstairs supports both repairing \
 one downstairs from another, and making a clone of a read-only downstairs \
@@ -196,11 +227,13 @@ when creating a new region in the crucible agent.
 client_package_name = "crucible-pantry-client"
 label = "Crucible Pantry"
 server_package_name = "crucible-pantry"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "ddm-admin-client"
 label = "Maghemite DDM Admin"
 server_package_name = "ddmd"
+versioned_how = "server"
 notes = """
 The `ddmd` server runs in each sled GZ and each switch zone. These daemons \
 provide an interface for advertising network prefixes, and observing what \
@@ -216,11 +249,17 @@ observability APIs in the future.
 client_package_name = "dns-service-client"
 label = "DNS Server"
 server_package_name = "dns-server-api"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "dpd-client"
 label = "Dendrite DPD"
 server_package_name = "dpd"
+# TODO We might need to pick this apart more carefully.  DPD invokes and is
+# invoked by several different services.  It's possible that some of them can
+# treat it as server-managed.
+versioned_how = "client"
+versioned_how_reason = "Sled Agent calling DPD creates a circular dependency"
 notes = """
 Dendrite's data plane daemon (`dpd`) is the interface to configure and manage \
 the rack switches. It's consumed by sled-agent to get the rack off the \
@@ -235,17 +274,29 @@ repo is not currently open source.
 client_package_name = "gateway-client"
 label = "Management Gateway Service"
 server_package_name = "gateway-api"
+versioned_how = "server"
 notes = "Wicketd is deployed in a unit with MGS so we can ignore that one."
 
 [[apis]]
 client_package_name = "installinator-client"
 label = "Wicketd Installinator"
 server_package_name = "installinator-api"
+# The installinator-client is used only by Installinator.  This is part of the
+# recovery OS image.  Today, this is not really "shipped" in the traditional
+# sense.  The only way it gets used today is by an operator uploading it to
+# Wicket and then performing a MUPdate.  In this sense, we do not control the
+# client, so we mark this client-managed -- it's up to the operator to make sure
+# they're using a TUF repo whose recovery image contains an Installinator that's
+# compatible with the API served by Wicket on the deployed system.  In practice,
+# we're almost certainly just going to avoid changing this API.
+versioned_how = "client"
+versioned_how_reason = "client is provided implicitly by the operator"
 
 [[apis]]
 client_package_name = "mg-admin-client"
 label = "Maghemite MG Admin"
 server_package_name = "mgd"
+versioned_how = "server"
 notes = """
 The `mgd` daemon runs in each switch zone. This daemon is responsible for all \
 external route management for a switch. It provides interfaces for static \
@@ -258,17 +309,25 @@ bring the rack up.
 client_package_name = "nexus-client"
 label = "Nexus Internal API"
 server_package_name = "nexus-internal-api"
+# nexus-client has to be client-versioned today because it's got a cyclic
+# dependency with sled-agent-client, which is server-versioned.
+versioned_how = "client"
+versioned_how_reason = "Circular dependencies between Nexus and other services"
 
 [[apis]]
 client_package_name = "oxide-client"
 label = "External API"
 server_package_name = "nexus-external-api"
+# The versioning strategy for the external API is outside the scope of this
+# tool.  It doesn't matter what we put here.
+versioned_how = "server"
 notes = "Special case, since we don't fully control all clients"
 
 [[apis]]
 client_package_name = "oximeter-client"
 label = "Oximeter"
 server_package_name = "oximeter-api"
+versioned_how = "server"
 notes = """
 Shared types for this interface are in `omicron-common`. The producer makes \
 requests to Nexus, and receives them from `oximeter-collector`. \
@@ -281,6 +340,7 @@ and makes the periodic renewal requests to `oximeter-collector`.
 client_package_name = "propolis-client"
 label = "Propolis"
 server_package_name = "propolis-server"
+versioned_how = "server"
 notes = """
 Sled Agent is deployed in a unit with Propolis so we can ignore that one.
 """
@@ -289,16 +349,20 @@ Sled Agent is deployed in a unit with Propolis so we can ignore that one.
 client_package_name = "sled-agent-client"
 label = "Sled Agent"
 server_package_name = "sled-agent-api"
+versioned_how = "server"
 
 [[apis]]
 client_package_name = "repo-depot-client"
 label = "Repo Depot API"
 server_package_name = "repo-depot-api"
+versioned_how = "client"
+versioned_how_reason = "depends on itself (i.e., instances call each other)"
 
 [[apis]]
 client_package_name = "wicketd-client"
 label = "Wicketd"
 server_package_name = "wicketd-api"
+versioned_how = "server"
 notes = """
 wicketd-client is only used by wicket, which is deployed in a unit with wicketd.
 """
@@ -307,6 +371,7 @@ wicketd-client is only used by wicket, which is deployed in a unit with wicketd.
 client_package_name = "crucible-control-client"
 label = "Crucible Control (for testing only)"
 server_package_name = "crucible"
+versioned_how = "server"
 notes = """
 Exposed by Crucible upstairs for debugging via the `cmon` debugging tool.
 """
@@ -315,6 +380,7 @@ Exposed by Crucible upstairs for debugging via the `cmon` debugging tool.
 client_package_name = "dsc-client"
 label = "Downstairs Controller (debugging only)"
 server_package_name = "dsc"
+versioned_how = "server"
 dev_only = true
 notes = """
 `dsc` is a control program for spinning up and controlling instances of Crucible
@@ -400,6 +466,16 @@ DNS server depends on itself only to provide TransientServer, which is not a
 deployed component.
 """
 
+[[dependency_filter_rules]]
+ancestor = "omicron-sled-agent"
+client = "sled-agent-client"
+evaluation = "not-deployed"
+note = """
+Sled Agent uses a Sled Agent client in two ways: RSS, which we don't care about
+for the purpose of upgrade, and in the `zone-bundle` binary, which is not
+deployed.
+"""
+
 #
 # "Bogus" dependencies.  See above for details.
 #
diff --git a/dev-tools/ls-apis/src/api_metadata.rs b/dev-tools/ls-apis/src/api_metadata.rs
index 311e95549f..01f133debe 100644
--- a/dev-tools/ls-apis/src/api_metadata.rs
+++ b/dev-tools/ls-apis/src/api_metadata.rs
@@ -108,6 +108,20 @@ impl AllApiMetadata {
 
         Ok(which_rules[0].evaluation)
     }
+
+    /// Returns the list of APIs that have non-DAG dependency rules
+    pub(crate) fn non_dag_apis(&self) -> impl Iterator<Item = &ApiMetadata> {
+        self.dependency_rules.iter().filter_map(|(client_pkgname, rules)| {
+            rules.iter().any(|r| r.evaluation == Evaluation::NonDag).then(
+                || {
+                    // unwrap(): we previously verified that the "client" for
+                    // all dependency rules corresponds to an API that we have
+                    // metadata for.
+                    self.apis.get(client_pkgname).unwrap()
+                },
+            )
+        })
+    }
 }
 
 /// Format of the `api-manifest.toml` file
@@ -186,6 +200,23 @@ impl TryFrom<RawApiMetadata> for AllApiMetadata {
     }
 }
 
+/// Describes how an API in the system manages drift between client and server
+#[derive(Deserialize, PartialEq, Eq)]
+#[serde(rename_all = "snake_case")]
+#[serde(tag = "versioned_how", content = "versioned_how_reason")]
+pub enum VersionedHow {
+    /// We have not yet determined how this API will be versioned.
+    Unknown,
+
+    /// This API will be versioned solely on the server.  (The update system
+    /// will ensure that servers are always updated before clients.)
+    Server,
+
+    /// This API will be versioned on the client.  (The update system cannot
+    /// ensure that servers are always updated before clients.)
+    Client(String),
+}
+
 /// Describes one API in the system
 #[derive(Deserialize)]
 pub struct ApiMetadata {
@@ -199,6 +230,9 @@ pub struct ApiMetadata {
     pub server_package_name: ServerPackageName,
     /// human-readable notes about this API
     pub notes: Option<String>,
+    /// describes how we've decided this API will be versioned
+    #[serde(default, flatten)]
+    pub versioned_how: VersionedHow,
     /// If `dev_only` is true, then this API's server is not deployed in a
     /// production system.  It's only used in development environments.  The
     /// default is that APIs *are* deployed.
diff --git a/dev-tools/ls-apis/src/bin/ls-apis.rs b/dev-tools/ls-apis/src/bin/ls-apis.rs
index 9d2a5b5349..ea0ae48cd4 100644
--- a/dev-tools/ls-apis/src/bin/ls-apis.rs
+++ b/dev-tools/ls-apis/src/bin/ls-apis.rs
@@ -4,12 +4,12 @@
 
 //! Show information about Progenitor-based APIs
 
-use anyhow::{Context, Result};
+use anyhow::{bail, Context, Result};
 use camino::Utf8PathBuf;
 use clap::{Args, Parser, Subcommand};
 use omicron_ls_apis::{
     AllApiMetadata, ApiDependencyFilter, LoadArgs, ServerComponentName,
-    SystemApis,
+    SystemApis, VersionedHow,
 };
 use parse_display::{Display, FromStr};
 
@@ -34,6 +34,8 @@ enum Cmds {
     Adoc,
     /// print out each API, what exports it, and what consumes it
     Apis(ShowDepsArgs),
+    /// check the update DAG and propose changes
+    Check,
     /// print out APIs exported and consumed by each deployment unit
     DeploymentUnits(DotArgs),
     /// print out APIs exported and consumed, by server component
@@ -81,6 +83,7 @@ fn main() -> Result<()> {
     match cli_args.cmd {
         Cmds::Adoc => run_adoc(&apis),
         Cmds::Apis(args) => run_apis(&apis, args),
+        Cmds::Check => run_check(&apis),
         Cmds::DeploymentUnits(args) => run_deployment_units(&apis, args),
         Cmds::Servers(args) => run_servers(&apis, args),
     }
@@ -93,12 +96,13 @@ fn run_adoc(apis: &SystemApis) -> Result<()> {
     println!(
         ".List of OpenAPI/Progenitor-based interfaces for online upgrade."
     );
-    println!(r#"[cols="1h,2,2,2a,2", options="header"]"#);
+    println!(r#"[cols="1h,2,2,2a,2,2", options="header"]"#);
     println!("|===");
     println!("|API");
     println!("|Server location (`repo:path`)");
     println!("|Client packages (`repo:path`)");
     println!("|Consumers (`repo:path`; excluding omdb and tests)");
+    println!("|Versioning");
     println!("|Notes");
     println!("");
 
@@ -122,6 +126,14 @@ fn run_adoc(apis: &SystemApis) -> Result<()> {
             println!("* {}", apis.adoc_label(c)?);
         }
 
+        match &api.versioned_how {
+            VersionedHow::Unknown => println!("|TBD"),
+            VersionedHow::Server => println!("|Server-side only"),
+            VersionedHow::Client(reason) => {
+                println!("|Client-side ({})", reason);
+            }
+        };
+
         print!("|{}", api.notes.as_deref().unwrap_or("-\n"));
         println!("");
     }
@@ -261,3 +273,84 @@ impl TryFrom<&LsApis> for LoadArgs {
         Ok(LoadArgs { api_manifest_path })
     }
 }
+
+fn run_check(apis: &SystemApis) -> Result<()> {
+    let dag_check = apis.dag_check()?;
+
+    for (pkg, reasons) in dag_check.proposed_server_managed() {
+        println!(
+            "proposal: make {:?} server-managed: {}",
+            pkg,
+            reasons.join(", ")
+        );
+    }
+
+    for (pkg, reasons) in dag_check.proposed_client_managed() {
+        println!(
+            "proposal: make {:?} client-managed: {}",
+            pkg,
+            reasons.join(", ")
+        );
+    }
+
+    for (pkg1, pkg2) in dag_check.proposed_upick() {
+        println!(
+            "proposal: choose either {:?} or {:?} to be client-managed \
+             (they directly depend on each other)",
+            pkg1, pkg2,
+        );
+    }
+
+    println!("\n");
+    println!("Server-managed APIs:\n");
+    for api in apis
+        .api_metadata()
+        .apis()
+        .filter(|f| f.deployed() && f.versioned_how == VersionedHow::Server)
+    {
+        println!(
+            "    {} ({}, exposed by {})",
+            api.label,
+            api.client_package_name,
+            apis.api_producer(&api.client_package_name).unwrap()
+        );
+    }
+
+    println!("\n");
+    println!("Client-managed API:\n");
+    for api in apis.api_metadata().apis().filter(|f| f.deployed()) {
+        if let VersionedHow::Client(reason) = &api.versioned_how {
+            println!(
+                "    {} ({}, exposed by {})",
+                api.label,
+                api.client_package_name,
+                apis.api_producer(&api.client_package_name).unwrap()
+            );
+            println!("        reason: {}", reason);
+        }
+    }
+
+    println!("\n");
+    print!("APIs with unknown version management:");
+    let unknown: Vec<_> = apis
+        .api_metadata()
+        .apis()
+        .filter(|f| f.versioned_how == VersionedHow::Unknown)
+        .collect();
+    if unknown.is_empty() {
+        println!(" none");
+    } else {
+        println!("\n");
+        for api in unknown {
+            println!(
+                "    {} ({}, exposed by {})",
+                api.label,
+                api.client_package_name,
+                apis.api_producer(&api.client_package_name).unwrap()
+            );
+        }
+        bail!("at least one API has unknown version strategy (see above)");
+    }
+
+    Ok(())
+}
diff --git a/dev-tools/ls-apis/src/lib.rs b/dev-tools/ls-apis/src/lib.rs
index 8d4f760c8b..c27606c25f 100644
--- a/dev-tools/ls-apis/src/lib.rs
+++ b/dev-tools/ls-apis/src/lib.rs
@@ -10,6 +10,7 @@ mod system_apis;
 mod workspaces;
 
 pub use api_metadata::AllApiMetadata;
+pub use api_metadata::VersionedHow;
 pub use system_apis::ApiDependencyFilter;
 pub use system_apis::SystemApis;
 
diff --git a/dev-tools/ls-apis/src/system_apis.rs b/dev-tools/ls-apis/src/system_apis.rs
index 5d4a84160a..ef6dae0e43 100644
--- a/dev-tools/ls-apis/src/system_apis.rs
+++ b/dev-tools/ls-apis/src/system_apis.rs
@@ -8,6 +8,7 @@
 use crate::api_metadata::AllApiMetadata;
 use crate::api_metadata::ApiMetadata;
 use crate::api_metadata::Evaluation;
+use crate::api_metadata::VersionedHow;
 use crate::cargo::DepPath;
 use crate::parse_toml_file;
 use crate::workspaces::Workspaces;
@@ -16,11 +17,13 @@ use crate::DeploymentUnitName;
 use crate::LoadArgs;
 use crate::ServerComponentName;
 use crate::ServerPackageName;
-use anyhow::{anyhow, bail, Context, Result};
+use anyhow::Result;
+use anyhow::{anyhow, bail, Context};
 use camino::Utf8PathBuf;
 use cargo_metadata::Package;
 use parse_display::{Display, FromStr};
 use petgraph::dot::Dot;
+use petgraph::graph::NodeIndex;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 
@@ -369,12 +372,27 @@ impl SystemApis {
         &self,
         filter: ApiDependencyFilter,
     ) -> Result<String> {
+        let (graph, _nodes) = self.make_component_graph(filter, false)?;
+        Ok(Dot::new(&graph).to_string())
+    }
+
+    // The complex type below is only used in this one place: the return value
+    // of an internal helper function.  A type alias doesn't seem better.
+    #[allow(clippy::type_complexity)]
+    fn make_component_graph(
+        &self,
+        dependency_filter: ApiDependencyFilter,
+        versioned_on_server_only: bool,
+    ) -> Result<(
+        petgraph::graph::Graph<&ServerComponentName, &ClientPackageName>,
+        BTreeMap<&ServerComponentName, NodeIndex>,
+    )> {
         let mut graph = petgraph::graph::Graph::new();
         let nodes: BTreeMap<_, _> = self
             .server_component_units
             .keys()
             .map(|server_component| {
-                (server_component.clone(), graph.add_node(server_component))
+                (server_component, graph.add_node(server_component))
             })
             .collect();
 
@@ -383,16 +401,350 @@ impl SystemApis {
         for server_component in self.apis_consumed.keys() {
             // unwrap(): we created a node for each server component above.
             let my_node = nodes.get(server_component).unwrap();
-            let consumed_apis =
-                self.component_apis_consumed(server_component, filter)?;
+            let consumed_apis = self
+                .component_apis_consumed(server_component, dependency_filter)?;
             for (client_pkg, _) in consumed_apis {
+                if versioned_on_server_only {
+                    let api = self
+                        .api_metadata
+                        .client_pkgname_lookup(client_pkg)
+                        .unwrap();
+                    if api.versioned_how != VersionedHow::Server {
+                        continue;
+                    }
+                }
+
                 let other_component = self.api_producer(client_pkg).unwrap();
                 let other_node = nodes.get(other_component).unwrap();
-                graph.add_edge(*my_node, *other_node, client_pkg.clone());
+                graph.add_edge(*my_node, *other_node, client_pkg);
             }
         }
 
-        Ok(Dot::new(&graph).to_string())
+        Ok((graph, nodes))
+    }
+
+    /// Verifies various important properties about the assignment of which APIs
+    /// are server-managed vs. client-managed.
+    ///
+    /// Returns a structure with proposals for how to assign APIs that are
+    /// currently unassigned.
+    pub fn dag_check(&self) -> Result<DagCheck<'_>> {
+        // In this function, we'll use the following ApiDependencyFilter a bunch
+        // when walking the component dependency graph.  "Default" is the
+        // correct filter to use here.  This excludes relationships that are
+        // totally bogus, only affect components that are never actually
+        // deployed, or are part of an edge that we've already determined will
+        // be "non-DAG".
+        //
+        // This last case might be a little confusing.  The whole point of this
+        // function is to help developers figure out which edges should be part
+        // of the DAG or not.  Why would we ignore edges based on whether
+        // they're already in the DAG or not?
+        //
+        // Recall that there are two ways that the metadata can specify that a
+        // particular API is "not part of the update DAG" (which is equivalent
+        // to client-side-managed):
+        //
+        // - a specific class of Cargo dependencies can be marked "non-DAG" via
+        //   a dependency filter rule.  The only case of this today is where we
+        //   say that Cargo dependencies from "oximeter-producer" to
+        //   "nexus-client" are "non-DAG".  This means we promise to make the
+        //   Nexus internal API client-managed (i.e., not part of the update
+        //   DAG).  We verify this promise below.
+        // - a specific API can be marked as server-managed (meaning it's part
+        //   of the update DAG) or not.  That's most of what this function deals
+        //   with and proposes changes to.
+        //
+        // In the long term, it might be nice to combine these.  But that's more
+        // work than it sounds like: we'd probably want to convert everything
+        // to filter rules, but that requires (tediously) writing out every
+        // single edge that we care about.  An alternative would be to eliminate
+        // the non-DAG dependency filter rules and only use the property at the
+        // API level.  However right now it seems quite possible that we do want
+        // this on a per-edge basis, rather than a per-API basis (i.e., there
+        // are some client-side-versioned APIs that have consumers that could
+        // treat them as server-side-versioned).
+        //
+        // Anyway, what we're talking about here is ignoring the first category
+        // of information and looking only at the second.  This is *safe* (i.e.,
+        // correct) because we verify below that the second category (the
+        // API-level `versioned_for` property) contains the same information
+        // provided by the first category (the non-DAG dependency filter rules).
+        // We *choose* to do this because it makes the heuristics below more
+        // useful.  For example, excluding the non-DAG edges makes it easy for
+        // the heuristic below to tell that crucible-pantry ought to be
+        // server-side-managed because it has no (other) dependencies and so
+        // can't be part of a cycle.
+        let filter = ApiDependencyFilter::Default;
+
+        // Construct a graph where:
+        //
+        // - nodes are all the API producer and consumer components
+        // - we only include edges *to* components that produce server-managed
+        //   APIs
+        //
+        // Check if this DAG is cyclic.  This can't be made to work.
+        let (graph, nodes) = self.make_component_graph(filter, true)?;
+        let reverse_nodes: BTreeMap<_, _> =
+            nodes.iter().map(|(s_c, node)| (node, s_c)).collect();
+        if let Err(error) = petgraph::algo::toposort(&graph, None) {
+            bail!(
+                "graph of server-managed components has a cycle (includes \
+                 node: {:?})",
+                reverse_nodes.get(&error.node_id()).unwrap()
+            );
+        }
+
+        // Verify that the targets of any "non-dag" dependency filter rules are
+        // indeed not part of the server-side-versioned DAG.
+        for api in self.api_metadata.non_dag_apis() {
+            if !matches!(api.versioned_how, VersionedHow::Client(..)) {
+                bail!(
+                    "API identified by client package {:?} ({}) is the \
+                     \"client\" in a \"non-dag\" dependency rule, but its \
+                     \"versioned_how\" is not \"client\"",
+                    api.client_package_name,
+                    api.label,
+                );
+            }
+        }
+
+        // Use some heuristics to propose next steps.
+        //
+        // We're only looking for possible next steps here -- we don't have to
+        // programmatically figure out the whole graph.
+
+        let mut dag_check = DagCheck::new();
+
+        for api in self.api_metadata.apis() {
+            if !api.deployed() {
+                if api.versioned_how == VersionedHow::Unknown {
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from("not produced by a deployed component"),
+                    );
+                }
+                continue;
+            }
+            let producer = self.api_producer(&api.client_package_name).unwrap();
+            let apis_consumed: BTreeSet<_> = self
+                .component_apis_consumed(producer, filter)?
+                .map(|(client_pkgname, _dep_path)| client_pkgname)
+                .collect();
+            let dependents: BTreeSet<_> = self
+                .api_consumers(&api.client_package_name, filter)
+                .unwrap()
+                .map(|(dependent, _dep_path)| dependent)
+                .collect();
+
+            if api.versioned_how == VersionedHow::Unknown {
+                // If we haven't determined how to manage versioning on this
+                // API, and it has no dependencies on "unknown" or
+                // client-managed APIs, then it can be made server-managed.
+                if !apis_consumed.iter().any(|client_pkgname| {
+                    let api = self
+                        .api_metadata
+                        .client_pkgname_lookup(*client_pkgname)
+                        .unwrap();
+                    api.versioned_how != VersionedHow::Server
+                }) {
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from(
+                            "has no unknown or client-managed dependencies",
+                        ),
+                    );
+                } else if apis_consumed.contains(&api.client_package_name) {
+                    // If this thing depends on itself, it must be
+                    // client-managed.
+                    dag_check.propose_client(
+                        &api.client_package_name,
+                        String::from("depends on itself"),
+                    );
+                } else if dependents.is_empty() {
+                    // If something has no consumers in deployed components, it
+                    // can be server-managed.  (These are generally debug APIs.)
+                    dag_check.propose_server(
+                        &api.client_package_name,
+                        String::from(
+                            "has no consumers among deployed components",
+                        ),
+                    );
+                }
+
+                continue;
+            }
+
+            let dependencies: BTreeMap<_, _> = apis_consumed
+                .iter()
+                .map(|dependency_clientpkg| {
+                    (
+                        self.api_producer(dependency_clientpkg).unwrap(),
+                        *dependency_clientpkg,
+                    )
+                })
+                .collect();
+
+            // Look for one-step circular dependencies (i.e., API API A1 is
+            // produced by component C1, which uses API A2 produced by C2, which
+            // also uses A1).  In such cases, either A1 or A2 must be
+            // client-managed (or both).
+            for other_pkgname in dependents {
+                if let Some(dependency_clientpkg) =
+                    dependencies.get(other_pkgname)
+                {
+                    let dependency_api = self
+                        .api_metadata
+                        .client_pkgname_lookup(*dependency_clientpkg)
+                        .unwrap();
+
+                    // If we're looking at a server-managed dependency and the
+                    // other is unknown, then that one should be client-managed.
+                    //
+                    // Without loss of generality, we can ignore the reverse
+                    // case (because we will catch that case when we're
+                    // iterating over the dependency API).
+                    if api.versioned_how == VersionedHow::Server
+                        && dependency_api.versioned_how == VersionedHow::Unknown
+                    {
+                        dag_check.propose_client(
+                            dependency_clientpkg,
+                            format!(
+                                "has cyclic dependency on {:?}, which is \
+                                 server-managed",
+                                api.client_package_name,
+                            ),
+                        )
+                    }
+
+                    // If both are Unknown, tell the user to pick one.
+                    if api.versioned_how == VersionedHow::Unknown
+                        && dependency_api.versioned_how == VersionedHow::Unknown
+                    {
+                        dag_check.propose_upick(
+                            &api.client_package_name,
+                            dependency_clientpkg,
+                        );
+                    }
+                }
+            }
+        }
+
+        Ok(dag_check)
+    }
+}
+
+/// Describes proposals for assigning how APIs should be versioned, based on
+/// heuristics applied while checking the DAG
+pub struct DagCheck<'a> {
+    /// set of APIs (identified by client package name) that we propose should
+    /// be server-managed, along with a list of reasons why we think so
+    proposed_server_managed: BTreeMap<&'a ClientPackageName, Vec<String>>,
+    /// set of APIs (identified by client package name) that we propose should
+    /// be client-managed, along with a list of reasons why we think so
+    proposed_client_managed: BTreeMap<&'a ClientPackageName, Vec<String>>,
+    /// set of pairs of APIs where we propose that the user must pick one
+    /// package in each pair to be client-managed (because the two packages have
+    /// a mutual dependency)
+    ///
+    /// The ordering in these pairs is not semantically significant.  The
+    /// implementation will ensure that each pair of packages is represented at
+    /// most once in this structure.
+    proposed_upick:
+        BTreeMap<&'a ClientPackageName, BTreeSet<&'a ClientPackageName>>,
+}
+
+impl<'a> DagCheck<'a> {
+    fn new() -> DagCheck<'a> {
+        DagCheck {
+            proposed_server_managed: BTreeMap::new(),
+            proposed_client_managed: BTreeMap::new(),
+            proposed_upick: BTreeMap::new(),
+        }
+    }
+
+    fn propose_client(
+        &mut self,
+        client_pkgname: &'a ClientPackageName,
+        reason: String,
+    ) {
+        self.proposed_client_managed
+            .entry(client_pkgname)
+            .or_insert_with(Vec::new)
+            .push(reason);
+    }
+
+    fn propose_server(
+        &mut self,
+        client_pkgname: &'a ClientPackageName,
+        reason: String,
+    ) {
+        self.proposed_server_managed
+            .entry(client_pkgname)
+            .or_insert_with(Vec::new)
+            .push(reason);
+    }
+
+    /// Propose that one of these two packages should be client-managed (because
+    /// they depend on each other, so they can't both be server-managed).
+    fn propose_upick(
+        &mut self,
+        client_pkgname1: &'a ClientPackageName,
+        client_pkgname2: &'a ClientPackageName,
+    ) {
+        // A "upick" is a situation where you (the person running the tool)
+        // should choose either of `pkg1` or `pkg2` to be client-managed.  The
+        // caller will identify this situation twice: once when looking at
+        // `pkg1` and once when looking at `pkg2`.  But we only want to report
+        // it once.  So we'll ignore duplicates here because it's easier here
+        // than in the caller.
+        //
+        // To do that, first check whether the caller has already proposed this
+        // "upick" with the packages in the other order.  If so, do nothing.
+        if let Some(other_pkg_upicks) = self.proposed_upick.get(client_pkgname2)
+        {
+            if other_pkg_upicks.contains(client_pkgname1) {
+                return;
+            }
+        }
+
+        // Now go ahead and insert the pair in this order.  This construction
+        // will also do nothing if this same pair has already been inserted in
+        // this order.
+        self.proposed_upick
+            .entry(client_pkgname1)
+            .or_insert_with(BTreeSet::new)
+            .insert(client_pkgname2);
+    }
+
+    /// Returns a list of APIs (identified by client package name) that look
+    /// like they could use server-side versioning, along with reasons
+    pub fn proposed_server_managed(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &Vec<String>)> {
+        self.proposed_server_managed.iter().map(|(c, r)| (*c, r))
+    }
+
+    /// Returns a list of APIs (identified by client package name) that look
+    /// like they should use client-side versioning, along with reasons
+    pub fn proposed_client_managed(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &Vec<String>)> {
+        self.proposed_client_managed.iter().map(|(c, r)| (*c, r))
+    }
+
+    /// Returns a list of pairs of APIs (identified by client package names) for
+    /// which we have not yet picked client-side or server-side versioning and
+    /// where there is a direct mutual dependency
+    ///
+    /// At least one of these APIs will need to be marked client-managed.
+    pub fn proposed_upick(
+        &self,
+    ) -> impl Iterator<Item = (&'_ ClientPackageName, &'_ ClientPackageName)>
+    {
+        self.proposed_upick
+            .iter()
+            .flat_map(|(c, others)| others.iter().map(|o| (*c, *o)))
     }
 }
 
diff --git a/dev-tools/ls-apis/tests/api_dependencies.out b/dev-tools/ls-apis/tests/api_dependencies.out
index 8cd2f0a085..8e061f2906 100644
--- a/dev-tools/ls-apis/tests/api_dependencies.out
+++ b/dev-tools/ls-apis/tests/api_dependencies.out
@@ -79,7 +79,6 @@ Repo Depot API (client: repo-depot-client)
 Sled Agent (client: sled-agent-client)
     consumed by: dpd (dendrite/dpd) via 1 path
     consumed by: omicron-nexus (omicron/nexus) via 7 paths
-    consumed by: omicron-sled-agent (omicron/sled-agent) via 1 path
 
 Wicketd (client: wicketd-client)
 

From 85feb0952e1fde1ef322d970e3f41145f9a0caf4 Mon Sep 17 00:00:00 2001
From: Andy Fiddaman <andy@oxidecomputer.com>
Date: Mon, 16 Dec 2024 11:21:35 +0000
Subject: [PATCH 09/10] Ensure services do not start in /root (#7166)

By default, an SMF service which has a non-trivial context will start
in the user's home directory. For services started as 'root' this means
/root. While a lot of services will subsequently chdir elsewhere, we
want to avoid anything using /root, even briefly, as it will soon be a
separate ZFS dataset on an Oxide system.
---
 smf/chrony-setup/manifest.xml | 2 +-
 smf/cockroachdb/manifest.xml  | 2 +-
 smf/ntp/manifest/manifest.xml | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/smf/chrony-setup/manifest.xml b/smf/chrony-setup/manifest.xml
index 7882693495..8127ca80df 100644
--- a/smf/chrony-setup/manifest.xml
+++ b/smf/chrony-setup/manifest.xml
@@ -14,7 +14,7 @@
   <exec_method type='method' name='start'
     exec='/opt/oxide/zone-setup-cli/bin/zone-setup chrony-setup -b %{config/boundary} -p %{config/boundary_pool} -s %{config/server} -a %{config/allow}'
     timeout_seconds='0'>
-  <method_context security_flags="aslr">
+  <method_context security_flags="aslr" working_directory="/">
   <method_credential user="root" group="root"
     privileges="basic,file_chown" />
   </method_context>
diff --git a/smf/cockroachdb/manifest.xml b/smf/cockroachdb/manifest.xml
index 67ddbe48b8..6ed18b1bca 100644
--- a/smf/cockroachdb/manifest.xml
+++ b/smf/cockroachdb/manifest.xml
@@ -19,7 +19,7 @@
   <exec_method type='method' name='start'
     exec='/opt/oxide/lib/svc/manifest/cockroachdb.sh'
     timeout_seconds='0'>
-    <method_context>
+    <method_context working_directory="/">
       <method_environment>
         <envvar name="GOTRACEBACK" value="crash" />
       </method_environment>
diff --git a/smf/ntp/manifest/manifest.xml b/smf/ntp/manifest/manifest.xml
index 5b45406a47..8fa1b4ebc4 100644
--- a/smf/ntp/manifest/manifest.xml
+++ b/smf/ntp/manifest/manifest.xml
@@ -66,7 +66,7 @@
   <exec_method type="method" name="start"
     exec='/usr/sbin/chronyd -d &amp;'
     timeout_seconds="60">
-  <method_context security_flags="aslr">
+  <method_context security_flags="aslr" working_directory="/">
   <method_credential user="root" group="root"
     privileges="basic,!file_link_any,!proc_info,!proc_session,file_chown_self,file_dac_search,file_dac_write,net_privaddr,proc_lock_memory,proc_priocntl,proc_setid,sys_time" />
   </method_context>

From ca21fe769b0f1c3f08fd8858bc53d5c207b2f7dc Mon Sep 17 00:00:00 2001
From: John Gallagher <john@oxidecomputer.com>
Date: Mon, 16 Dec 2024 09:04:00 -0500
Subject: [PATCH 10/10] [reconfigurator] SledEditor: be more strict about
 decommissioned sleds (#7234)

This is a followup from
https://github.com/oxidecomputer/omicron/pull/7204#discussion_r1870465378
and makes two changes, neither of which should affect behavior:

* `SledEditor` will now fail if a caller attempts to make changes to a
decommissioned sled (internally, this is statically enforced by a type
state enum - a sled in the decommissioned state does not have any
methods that support editing, so we're forced to return an error)
* `SledEditor::set_state()` is now `SledEditor::decommission()`, and it
performs some checks that the sled looks decommissionable

The second bullet is more questionable than I expected it to be:

1. There are some arguments that `SledEditor` shouldn't do any checks
here; in particular, it doesn't have the full context (e.g., any checks
on "should we decommission this sled" that depend on the `PlanningInput`
can't be performed here, because `SledEditor` intentionally doesn't have
access to `PlanningInput`).
2. I wanted to check zones + disks + datasets, but in practice it can
only check zones today; I left a comment (and the commented-out disks +
datasets checks we should do) about why. I think we will eventually be
able to turn these on; the current behavior of removing disks/datasets
from the blueprint for expunged sleds will have to change to fix #7078,
at which point these checks should be valid.

I don't feel super strongly about the checks in `decommission()` or even
this PR as a whole; if this doesn't look like a useful direction, I'd be
fine with discarding it. Please review with a pretty critical eye.
---
 Cargo.lock                                    |   1 +
 nexus/reconfigurator/planning/Cargo.toml      |   1 +
 .../planning/src/blueprint_builder/builder.rs |  46 ++--
 .../src/blueprint_editor/sled_editor.rs       | 255 +++++++++++++++++-
 .../blueprint_editor/sled_editor/datasets.rs  |   6 +-
 nexus/reconfigurator/planning/src/planner.rs  |   3 +-
 6 files changed, 278 insertions(+), 34 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 6a44c7af3d..44ef4eccc5 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -6071,6 +6071,7 @@ dependencies = [
  "indexmap 2.6.0",
  "internal-dns-resolver",
  "ipnet",
+ "itertools 0.13.0",
  "maplit",
  "nexus-config",
  "nexus-inventory",
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index 43a65ad085..efed173a71 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -16,6 +16,7 @@ illumos-utils.workspace = true
 indexmap.workspace = true
 internal-dns-resolver.workspace = true
 ipnet.workspace = true
+itertools.workspace = true
 nexus-config.workspace = true
 nexus-inventory.workspace = true
 nexus-sled-agent-shared.workspace = true
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index 394133132b..74e91ff943 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -228,6 +228,14 @@ pub struct SledEditCounts {
 }
 
 impl SledEditCounts {
+    pub fn zeroes() -> Self {
+        Self {
+            disks: EditCounts::zeroes(),
+            datasets: EditCounts::zeroes(),
+            zones: EditCounts::zeroes(),
+        }
+    }
+
     fn has_nonzero_counts(&self) -> bool {
         let Self { disks, datasets, zones } = self;
         disks.has_nonzero_counts()
@@ -548,7 +556,7 @@ impl<'a> BlueprintBuilder<'a> {
                     generation: Generation::new(),
                     datasets: BTreeMap::new(),
                 });
-            let editor = SledEditor::new(
+            let editor = SledEditor::for_existing(
                 state,
                 zones.clone(),
                 disks,
@@ -565,8 +573,7 @@ impl<'a> BlueprintBuilder<'a> {
         // that weren't in the parent blueprint. (These are newly-added sleds.)
         for sled_id in input.all_sled_ids(SledFilter::Commissioned) {
             if let Entry::Vacant(slot) = sled_editors.entry(sled_id) {
-                slot.insert(SledEditor::new_empty(
-                    SledState::Active,
+                slot.insert(SledEditor::for_new_active(
                     build_preexisting_dataset_ids(sled_id)?,
                 ));
             }
@@ -735,8 +742,8 @@ impl<'a> BlueprintBuilder<'a> {
             .retain(|sled_id, _| in_service_sled_ids.contains(sled_id));
 
         // If we have the clickhouse cluster setup enabled via policy and we
-        // don't yet have a `ClickhouseClusterConfiguration`, then we must create
-        // one and feed it to our `ClickhouseAllocator`.
+        // don't yet have a `ClickhouseClusterConfiguration`, then we must
+        // create one and feed it to our `ClickhouseAllocator`.
         let clickhouse_allocator = if self.input.clickhouse_cluster_enabled() {
             let parent_config = self
                 .parent_blueprint
@@ -815,18 +822,18 @@ impl<'a> BlueprintBuilder<'a> {
     }
 
     /// Set the desired state of the given sled.
-    pub fn set_sled_state(
+    pub fn set_sled_decommissioned(
         &mut self,
         sled_id: SledUuid,
-        desired_state: SledState,
     ) -> Result<(), Error> {
         let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
             Error::Planner(anyhow!(
                 "tried to set sled state for unknown sled {sled_id}"
             ))
         })?;
-        editor.set_state(desired_state);
-        Ok(())
+        editor
+            .decommission()
+            .map_err(|err| Error::SledEditError { sled_id, err })
     }
 
     /// Within tests, set an RNG for deterministic results.
@@ -1046,15 +1053,18 @@ impl<'a> BlueprintBuilder<'a> {
         // blueprint
         for (disk_id, (zpool, disk)) in database_disks {
             database_disk_ids.insert(disk_id);
-            editor.ensure_disk(
-                BlueprintPhysicalDiskConfig {
-                    disposition: BlueprintPhysicalDiskDisposition::InService,
-                    identity: disk.disk_identity.clone(),
-                    id: disk_id,
-                    pool_id: *zpool,
-                },
-                &mut self.rng,
-            );
+            editor
+                .ensure_disk(
+                    BlueprintPhysicalDiskConfig {
+                        disposition:
+                            BlueprintPhysicalDiskDisposition::InService,
+                        identity: disk.disk_identity.clone(),
+                        id: disk_id,
+                        pool_id: *zpool,
+                    },
+                    &mut self.rng,
+                )
+                .map_err(|err| Error::SledEditError { sled_id, err })?;
         }
 
         // Remove any disks that appear in the blueprint, but not the database
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
index 13094b97a4..995cc306a5 100644
--- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
@@ -7,18 +7,25 @@
 use crate::blueprint_builder::SledEditCounts;
 use crate::planner::PlannerRng;
 use illumos_utils::zpool::ZpoolName;
+use itertools::Either;
+use nexus_sled_agent_shared::inventory::ZoneKind;
 use nexus_types::deployment::blueprint_zone_type;
 use nexus_types::deployment::BlueprintDatasetsConfig;
 use nexus_types::deployment::BlueprintPhysicalDiskConfig;
 use nexus_types::deployment::BlueprintPhysicalDisksConfig;
 use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZoneFilter;
 use nexus_types::deployment::BlueprintZoneType;
 use nexus_types::deployment::BlueprintZonesConfig;
 use nexus_types::deployment::DiskFilter;
 use nexus_types::external_api::views::SledState;
+use omicron_common::disk::DatasetKind;
+use omicron_uuid_kinds::DatasetUuid;
 use omicron_uuid_kinds::OmicronZoneUuid;
 use omicron_uuid_kinds::PhysicalDiskUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::mem;
 
 mod datasets;
 mod disks;
@@ -50,6 +57,32 @@ pub enum SledInputError {
 
 #[derive(Debug, thiserror::Error)]
 pub enum SledEditError {
+    #[error("editing a decommissioned sled is not allowed")]
+    EditDecommissioned,
+    #[error(
+        "sled is not decommissionable: \
+         disk {disk_id} (zpool {zpool_id}) is in service"
+    )]
+    NonDecommissionableDiskInService {
+        disk_id: PhysicalDiskUuid,
+        zpool_id: ZpoolUuid,
+    },
+    #[error(
+        "sled is not decommissionable: \
+         dataset {dataset_id} (kind {kind:?}) is in service"
+    )]
+    NonDecommissionableDatasetInService {
+        dataset_id: DatasetUuid,
+        kind: DatasetKind,
+    },
+    #[error(
+        "sled is not decommissionable: \
+         zone {zone_id} (kind {kind:?}) is not expunged"
+    )]
+    NonDecommissionableZoneNotExpunged {
+        zone_id: OmicronZoneUuid,
+        kind: ZoneKind,
+    },
     #[error("failed to edit disks")]
     EditDisks(#[from] DisksEditError),
     #[error("failed to edit datasets")]
@@ -74,8 +107,196 @@ pub enum SledEditError {
 }
 
 #[derive(Debug)]
-pub(crate) struct SledEditor {
-    state: SledState,
+pub(crate) struct SledEditor(InnerSledEditor);
+
+#[derive(Debug)]
+enum InnerSledEditor {
+    // Internally, `SledEditor` has a variant for each variant of `SledState`,
+    // as the operations allowed in different states are substantially different
+    // (i.e., an active sled allows any edit; a decommissioned sled allows
+    // none).
+    Active(ActiveSledEditor),
+    Decommissioned(EditedSled),
+}
+
+impl SledEditor {
+    pub fn for_existing(
+        state: SledState,
+        zones: BlueprintZonesConfig,
+        disks: BlueprintPhysicalDisksConfig,
+        datasets: BlueprintDatasetsConfig,
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Result<Self, SledInputError> {
+        match state {
+            SledState::Active => {
+                let inner = ActiveSledEditor::new(
+                    zones,
+                    disks,
+                    datasets,
+                    preexisting_dataset_ids,
+                )?;
+                Ok(Self(InnerSledEditor::Active(inner)))
+            }
+            SledState::Decommissioned => {
+                let inner = EditedSled {
+                    state,
+                    zones,
+                    disks,
+                    datasets,
+                    edit_counts: SledEditCounts::zeroes(),
+                };
+                Ok(Self(InnerSledEditor::Decommissioned(inner)))
+            }
+        }
+    }
+
+    pub fn for_new_active(
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Self {
+        Self(InnerSledEditor::Active(ActiveSledEditor::new_empty(
+            preexisting_dataset_ids,
+        )))
+    }
+
+    pub fn finalize(self) -> EditedSled {
+        match self.0 {
+            InnerSledEditor::Active(editor) => editor.finalize(),
+            InnerSledEditor::Decommissioned(edited) => edited,
+        }
+    }
+
+    pub fn edit_counts(&self) -> SledEditCounts {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => editor.edit_counts(),
+            InnerSledEditor::Decommissioned(edited) => edited.edit_counts,
+        }
+    }
+
+    pub fn decommission(&mut self) -> Result<(), SledEditError> {
+        match &mut self.0 {
+            InnerSledEditor::Active(editor) => {
+                // Decommissioning a sled is a one-way trip that has many
+                // preconditions. We can't check all of them here (e.g., we
+                // should kick the sled out of trust quorum before
+                // decommissioning, which is entirely outside the realm of
+                // `SledEditor`. But we can do some basic checks: all of the
+                // disks, datasets, and zones for this sled should be expunged.
+                editor.validate_decommisionable()?;
+
+                // We can't take ownership of `editor` from the `&mut self`
+                // reference we have, and we need ownership to call
+                // `finalize()`. Steal the contents via `mem::swap()` with an
+                // empty editor. This isn't panic safe (i.e., if we panic
+                // between the `mem::swap()` and the reassignment to `self.0`
+                // below, we'll be left in the active state with an empty sled
+                // editor), but omicron in general is not panic safe and aborts
+                // on panic. Plus `finalize()` should never panic.
+                let mut stolen = ActiveSledEditor::new_empty(
+                    DatasetIdsBackfillFromDb::empty(),
+                );
+                mem::swap(editor, &mut stolen);
+
+                let mut finalized = stolen.finalize();
+                finalized.state = SledState::Decommissioned;
+                self.0 = InnerSledEditor::Decommissioned(finalized);
+            }
+            // If we're already decommissioned, there's nothing to do.
+            InnerSledEditor::Decommissioned(_) => (),
+        }
+        Ok(())
+    }
+
+    pub fn disks(
+        &self,
+        filter: DiskFilter,
+    ) -> impl Iterator<Item = &BlueprintPhysicalDiskConfig> {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => {
+                Either::Left(editor.disks(filter))
+            }
+            InnerSledEditor::Decommissioned(edited) => Either::Right(
+                edited
+                    .disks
+                    .disks
+                    .iter()
+                    .filter(move |disk| disk.disposition.matches(filter)),
+            ),
+        }
+    }
+
+    pub fn zones(
+        &self,
+        filter: BlueprintZoneFilter,
+    ) -> impl Iterator<Item = &BlueprintZoneConfig> {
+        match &self.0 {
+            InnerSledEditor::Active(editor) => {
+                Either::Left(editor.zones(filter))
+            }
+            InnerSledEditor::Decommissioned(edited) => Either::Right(
+                edited
+                    .zones
+                    .zones
+                    .iter()
+                    .filter(move |zone| zone.disposition.matches(filter)),
+            ),
+        }
+    }
+
+    fn as_active_mut(
+        &mut self,
+    ) -> Result<&mut ActiveSledEditor, SledEditError> {
+        match &mut self.0 {
+            InnerSledEditor::Active(editor) => Ok(editor),
+            InnerSledEditor::Decommissioned(_) => {
+                Err(SledEditError::EditDecommissioned)
+            }
+        }
+    }
+
+    pub fn ensure_disk(
+        &mut self,
+        disk: BlueprintPhysicalDiskConfig,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.ensure_disk(disk, rng);
+        Ok(())
+    }
+
+    pub fn expunge_disk(
+        &mut self,
+        disk_id: &PhysicalDiskUuid,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.expunge_disk(disk_id)
+    }
+
+    pub fn add_zone(
+        &mut self,
+        zone: BlueprintZoneConfig,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.add_zone(zone, rng)
+    }
+
+    pub fn expunge_zone(
+        &mut self,
+        zone_id: &OmicronZoneUuid,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.expunge_zone(zone_id)
+    }
+
+    /// Backwards compatibility / test helper: If we're given a blueprint that
+    /// has zones but wasn't created via `SledEditor`, it might not have
+    /// datasets for all its zones. This method backfills them.
+    pub fn ensure_datasets_for_running_zones(
+        &mut self,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        self.as_active_mut()?.ensure_datasets_for_running_zones(rng)
+    }
+}
+
+#[derive(Debug)]
+struct ActiveSledEditor {
     zones: ZonesEditor,
     disks: DisksEditor,
     datasets: DatasetsEditor,
@@ -90,16 +311,14 @@ pub(crate) struct EditedSled {
     pub edit_counts: SledEditCounts,
 }
 
-impl SledEditor {
+impl ActiveSledEditor {
     pub fn new(
-        state: SledState,
         zones: BlueprintZonesConfig,
         disks: BlueprintPhysicalDisksConfig,
         datasets: BlueprintDatasetsConfig,
         preexisting_dataset_ids: DatasetIdsBackfillFromDb,
     ) -> Result<Self, SledInputError> {
         Ok(Self {
-            state,
             zones: zones.try_into()?,
             disks: disks.try_into()?,
             datasets: DatasetsEditor::new(datasets, preexisting_dataset_ids)?,
@@ -107,11 +326,9 @@ impl SledEditor {
     }
 
     pub fn new_empty(
-        state: SledState,
         preexisting_dataset_ids: DatasetIdsBackfillFromDb,
     ) -> Self {
         Self {
-            state,
             zones: ZonesEditor::empty(),
             disks: DisksEditor::empty(),
             datasets: DatasetsEditor::empty(preexisting_dataset_ids),
@@ -123,7 +340,7 @@ impl SledEditor {
         let (datasets, datasets_counts) = self.datasets.finalize();
         let (zones, zones_counts) = self.zones.finalize();
         EditedSled {
-            state: self.state,
+            state: SledState::Active,
             zones,
             disks,
             datasets,
@@ -135,6 +352,24 @@ impl SledEditor {
         }
     }
 
+    fn validate_decommisionable(&self) -> Result<(), SledEditError> {
+        // ... and all zones are expunged.
+        if let Some(zone) = self.zones(BlueprintZoneFilter::All).find(|zone| {
+            match zone.disposition {
+                BlueprintZoneDisposition::InService
+                | BlueprintZoneDisposition::Quiesced => true,
+                BlueprintZoneDisposition::Expunged => false,
+            }
+        }) {
+            return Err(SledEditError::NonDecommissionableZoneNotExpunged {
+                zone_id: zone.id,
+                kind: zone.zone_type.kind(),
+            });
+        }
+
+        Ok(())
+    }
+
     pub fn edit_counts(&self) -> SledEditCounts {
         SledEditCounts {
             disks: self.disks.edit_counts(),
@@ -143,10 +378,6 @@ impl SledEditor {
         }
     }
 
-    pub fn set_state(&mut self, new_state: SledState) {
-        self.state = new_state;
-    }
-
     pub fn disks(
         &self,
         filter: DiskFilter,
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
index de397b9caa..211af59aab 100644
--- a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
@@ -7,7 +7,6 @@ use crate::planner::PlannerRng;
 use illumos_utils::zpool::ZpoolName;
 use nexus_types::deployment::BlueprintDatasetConfig;
 use nexus_types::deployment::BlueprintDatasetDisposition;
-use nexus_types::deployment::BlueprintDatasetFilter;
 use nexus_types::deployment::BlueprintDatasetsConfig;
 use nexus_types::deployment::SledResources;
 use nexus_types::deployment::ZpoolFilter;
@@ -24,6 +23,9 @@ use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::net::SocketAddrV6;
 
+#[cfg(test)]
+use nexus_types::deployment::BlueprintDatasetFilter;
+
 #[derive(Debug, thiserror::Error)]
 #[error(
     "invalid blueprint input: multiple datasets with kind {kind:?} \
@@ -269,7 +271,7 @@ impl DatasetsEditor {
         self.counts
     }
 
-    #[allow(dead_code)] // currently only used by tests; this will change soon
+    #[cfg(test)]
     pub fn datasets(
         &self,
         filter: BlueprintDatasetFilter,
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index 56fc671667..24ef80b253 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -159,8 +159,7 @@ impl<'a> Planner<'a> {
             let num_instances_assigned = 0;
 
             if all_zones_expunged && num_instances_assigned == 0 {
-                self.blueprint
-                    .set_sled_state(sled_id, SledState::Decommissioned)?;
+                self.blueprint.set_sled_decommissioned(sled_id)?;
             }
         }