diff --git a/Cargo.lock b/Cargo.lock
index 951451e9ff..6ed708a89c 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -287,7 +287,7 @@ dependencies = [
  "bb8",
  "diesel",
  "futures",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -534,7 +534,7 @@ dependencies = [
  "serde_with",
  "sha3",
  "static_assertions",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -687,13 +687,13 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "85b6598a2f5d564fb7855dc6b06fd1c38cff5a72bd8b863a4d021938497b440a"
 dependencies = [
  "serde",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
 name = "bhyve_api"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "bhyve_api_sys",
  "libc",
@@ -703,7 +703,7 @@ dependencies = [
 [[package]]
 name = "bhyve_api_sys"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "libc",
  "strum",
@@ -718,7 +718,7 @@ dependencies = [
  "bitflags 2.6.0",
  "cexpr",
  "clang-sys",
- "itertools 0.12.1",
+ "itertools 0.10.5",
  "lazy_static",
  "lazycell",
  "log",
@@ -895,7 +895,7 @@ dependencies = [
  "slog",
  "slog-async",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
  "vsss-rs",
@@ -1056,7 +1056,7 @@ dependencies = [
  "semver 1.0.23",
  "serde",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -1449,7 +1449,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -1668,7 +1668,7 @@ source = "git+https://github.com/oxidecomputer/propolis?rev=fae5334bcad5e8647943
 dependencies = [
  "serde",
  "serde_derive",
- "thiserror",
+ "thiserror 1.0.69",
  "toml 0.7.8",
 ]
 
@@ -1822,7 +1822,7 @@ dependencies = [
 [[package]]
 name = "crucible-agent-client"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=2cfc7e0c8572b3bfafbfc838c4e6d658f442d239#2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source = "git+https://github.com/oxidecomputer/crucible?rev=5a41b826171c7d2a8412fa833377ab1df25ee8ec#5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1838,7 +1838,7 @@ dependencies = [
 [[package]]
 name = "crucible-client-types"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/crucible?rev=2cfc7e0c8572b3bfafbfc838c4e6d658f442d239#2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source = "git+https://github.com/oxidecomputer/crucible?rev=5a41b826171c7d2a8412fa833377ab1df25ee8ec#5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 dependencies = [
  "base64 0.22.1",
  "crucible-workspace-hack",
@@ -1851,7 +1851,7 @@ dependencies = [
 [[package]]
 name = "crucible-common"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=2cfc7e0c8572b3bfafbfc838c4e6d658f442d239#2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source = "git+https://github.com/oxidecomputer/crucible?rev=5a41b826171c7d2a8412fa833377ab1df25ee8ec#5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 dependencies = [
  "anyhow",
  "atty",
@@ -1869,7 +1869,7 @@ dependencies = [
  "slog-dtrace",
  "slog-term",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-rustls 0.24.1",
  "toml 0.8.19",
@@ -1881,7 +1881,7 @@ dependencies = [
 [[package]]
 name = "crucible-pantry-client"
 version = "0.0.1"
-source = "git+https://github.com/oxidecomputer/crucible?rev=2cfc7e0c8572b3bfafbfc838c4e6d658f442d239#2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source = "git+https://github.com/oxidecomputer/crucible?rev=5a41b826171c7d2a8412fa833377ab1df25ee8ec#5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 dependencies = [
  "anyhow",
  "chrono",
@@ -1898,13 +1898,13 @@ dependencies = [
 [[package]]
 name = "crucible-smf"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/crucible?rev=2cfc7e0c8572b3bfafbfc838c4e6d658f442d239#2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source = "git+https://github.com/oxidecomputer/crucible?rev=5a41b826171c7d2a8412fa833377ab1df25ee8ec#5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 dependencies = [
  "crucible-workspace-hack",
  "libc",
  "num-derive",
  "num-traits",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -2150,7 +2150,7 @@ version = "0.3.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ff4a5fefe330e8d7f31b16a318f9ce81000d8e35e69b93eae154d16d2278f70f"
 dependencies = [
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -2272,7 +2272,7 @@ dependencies = [
  "hex",
  "ipnet",
  "rand",
- "thiserror",
+ "thiserror 1.0.69",
  "trust-dns-proto",
  "url",
 ]
@@ -2302,9 +2302,9 @@ dependencies = [
 
 [[package]]
 name = "diesel"
-version = "2.2.4"
+version = "2.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "158fe8e2e68695bd615d7e4f3227c0727b151330d3e253b525086c348d055d5e"
+checksum = "ccf1bedf64cdb9643204a36dd15b19a6ce8e7aa7f7b105868e9f1fad5ffa7d12"
 dependencies = [
  "bitflags 2.6.0",
  "byteorder",
@@ -2321,9 +2321,9 @@ dependencies = [
 
 [[package]]
 name = "diesel-dtrace"
-version = "0.3.0"
+version = "0.4.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "5318329cce80f28564e585bb5ba4007bdf16865efa13d797a4f0fd4b1fed40f1"
+checksum = "4e5130181059723aae1cfdb678d3698052a225aaadb18000f77fec4200047acc"
 dependencies = [
  "diesel",
  "serde",
@@ -2425,7 +2425,7 @@ dependencies = [
  "libdlpi-sys",
  "num_enum",
  "pretty-hex 0.2.1",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -2464,7 +2464,7 @@ dependencies = [
  "slog-term",
  "subprocess",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "uuid",
@@ -2514,7 +2514,7 @@ dependencies = [
  "pretty-hex 0.4.1",
  "serde",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "zerocopy 0.7.35",
 ]
 
@@ -2639,7 +2639,7 @@ dependencies = [
  "slog-bunyan",
  "slog-json",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-rustls 0.25.0",
  "toml 0.8.19",
@@ -2700,7 +2700,7 @@ checksum = "71734e3eb68cd4df338d04dffdcc024f89eb0b238150cc95b826fbfad756452b"
 dependencies = [
  "pest",
  "pest_derive",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -3429,7 +3429,7 @@ dependencies = [
  "slog-error-chain",
  "socket2",
  "string_cache",
- "thiserror",
+ "thiserror 1.0.69",
  "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc.git?branch=main)",
  "tokio",
  "usdt",
@@ -3692,9 +3692,9 @@ dependencies = [
 
 [[package]]
 name = "hashbrown"
-version = "0.15.0"
+version = "0.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "1e087f84d4f86bf4b218b927129862374b72199ae7d8657835f1e89000eea4fb"
+checksum = "3a9bfc1af68b1726ea47d3d5109de126281def866b33970e10fbab11b5dafab3"
 dependencies = [
  "allocator-api2",
  "equivalent",
@@ -3828,7 +3828,7 @@ dependencies = [
  "once_cell",
  "radix_trie",
  "rand",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tracing",
 ]
@@ -3850,7 +3850,7 @@ dependencies = [
  "ipnet",
  "once_cell",
  "rand",
- "thiserror",
+ "thiserror 1.0.69",
  "tinyvec",
  "tokio",
  "tracing",
@@ -3873,7 +3873,7 @@ dependencies = [
  "rand",
  "resolv-conf",
  "smallvec 1.13.2",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tracing",
 ]
@@ -3891,7 +3891,7 @@ dependencies = [
  "futures-util",
  "hickory-proto",
  "serde",
- "thiserror",
+ "thiserror 1.0.69",
  "time",
  "tokio",
  "tokio-util",
@@ -4036,7 +4036,7 @@ dependencies = [
  "similar",
  "stringmetrics",
  "tabwriter",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "url",
 ]
@@ -4097,7 +4097,7 @@ dependencies = [
  "object 0.30.4",
  "path-slash",
  "rsa",
- "thiserror",
+ "thiserror 1.0.69",
  "tlvc 0.3.1 (git+https://github.com/oxidecomputer/tlvc)",
  "tlvc-text",
  "toml 0.7.8",
@@ -4162,7 +4162,7 @@ dependencies = [
  "http",
  "hyper",
  "hyper-util",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "tokio",
  "tokio-rustls 0.26.0",
@@ -4446,7 +4446,7 @@ dependencies = [
 [[package]]
 name = "illumos-sys-hdrs"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 
 [[package]]
 name = "illumos-utils"
@@ -4481,7 +4481,7 @@ dependencies = [
  "slog",
  "slog-error-chain",
  "smf",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "uuid",
@@ -4524,7 +4524,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "707907fe3c25f5424cce2cb7e1cbcafee6bdbe735ca90ef77c29e84591e5b9da"
 dependencies = [
  "equivalent",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "serde",
 ]
 
@@ -4650,7 +4650,7 @@ dependencies = [
  "slog-term",
  "smf",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
  "tufaceous-lib",
@@ -4708,7 +4708,7 @@ dependencies = [
  "serde_json",
  "serde_with",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "update-engine",
 ]
@@ -4764,7 +4764,7 @@ dependencies = [
  "sled",
  "slog",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -4800,7 +4800,7 @@ dependencies = [
  "proptest",
  "serde",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "uuid",
 ]
 
@@ -4934,7 +4934,7 @@ dependencies = [
  "secrecy",
  "sha3",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "zeroize",
 ]
@@ -4942,7 +4942,7 @@ dependencies = [
 [[package]]
 name = "kstat-macro"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 dependencies = [
  "quote",
  "syn 2.0.87",
@@ -4955,7 +4955,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "27964e4632377753acb0898ce6f28770d50cbca1339200ae63d700cff97b5c2b"
 dependencies = [
  "libc",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -5028,7 +5028,7 @@ source = "git+https://github.com/oxidecomputer/libefi-illumos?branch=master#54c3
 dependencies = [
  "libc",
  "libefi-sys",
- "thiserror",
+ "thiserror 1.0.69",
  "uuid",
 ]
 
@@ -5065,7 +5065,7 @@ dependencies = [
  "slog-term",
  "smf",
  "tabwriter",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-tungstenite 0.21.0",
  "toml 0.7.8",
@@ -5093,7 +5093,7 @@ source = "git+https://github.com/oxidecomputer/libipcc?rev=fdffa212373a8f92473ea
 dependencies = [
  "cfg-if",
  "libc",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -5103,7 +5103,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4979f22fdb869068da03c9f7528f8297c6fd2606bc3a4affe42e6a823fdb8da4"
 dependencies = [
  "cfg-if",
- "windows-targets 0.52.6",
+ "windows-targets 0.48.5",
 ]
 
 [[package]]
@@ -5129,7 +5129,7 @@ dependencies = [
  "rand",
  "rusty-doors",
  "socket2",
- "thiserror",
+ "thiserror 1.0.69",
  "tracing",
  "winnow 0.6.20",
 ]
@@ -5151,7 +5151,7 @@ dependencies = [
  "rand",
  "rusty-doors",
  "socket2",
- "thiserror",
+ "thiserror 1.0.69",
  "tracing",
  "winnow 0.6.20",
 ]
@@ -5163,7 +5163,7 @@ source = "git+https://github.com/oxidecomputer/libnvme?rev=dd5bb221d327a1bc92879
 dependencies = [
  "libnvme-sys",
  "nvme",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -5328,7 +5328,7 @@ dependencies = [
  "serde",
  "serde-hex",
  "sha2",
- "thiserror",
+ "thiserror 1.0.69",
  "x509-cert",
  "zerocopy 0.6.6",
 ]
@@ -5339,7 +5339,7 @@ version = "0.12.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "234cf4f4a04dc1f57e24b96cc0cd600cf2af460d4161ac5ecdd0af8e1f3b2a38"
 dependencies = [
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
 ]
 
 [[package]]
@@ -5685,7 +5685,7 @@ dependencies = [
  "serde_urlencoded",
  "slog",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -5791,7 +5791,7 @@ dependencies = [
  "slog-error-chain",
  "steno",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -5870,7 +5870,7 @@ dependencies = [
  "subprocess",
  "swrite",
  "term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "url",
  "usdt",
@@ -5949,7 +5949,7 @@ dependencies = [
  "sled-agent-client",
  "slog",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "typed-rng",
  "uuid",
@@ -5987,7 +5987,7 @@ dependencies = [
  "pq-sys",
  "slog",
  "slog-error-chain",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -6089,7 +6089,7 @@ dependencies = [
  "static_assertions",
  "strum",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "typed-rng",
  "uuid",
 ]
@@ -6127,7 +6127,7 @@ dependencies = [
  "slog",
  "swrite",
  "sync-ptr",
- "thiserror",
+ "thiserror 1.0.69",
  "typed-rng",
  "uuid",
 ]
@@ -6174,7 +6174,7 @@ dependencies = [
  "serde_json",
  "sled-hardware-types",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "uuid",
 ]
 
@@ -6271,6 +6271,7 @@ dependencies = [
  "gateway-client",
  "http",
  "humantime",
+ "illumos-utils",
  "internal-dns-types",
  "ipnetwork",
  "newtype-uuid",
@@ -6294,7 +6295,7 @@ dependencies = [
  "steno",
  "strum",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "update-engine",
  "uuid",
 ]
@@ -6593,7 +6594,7 @@ dependencies = [
  "openssl",
  "openssl-sys",
  "rcgen",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -6629,7 +6630,7 @@ dependencies = [
  "slog-error-chain",
  "slog-term",
  "subprocess",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-postgres",
  "toml 0.8.19",
@@ -6671,7 +6672,7 @@ dependencies = [
  "slog-error-chain",
  "subprocess",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-postgres",
  "toml 0.8.19",
@@ -6719,7 +6720,7 @@ dependencies = [
  "slog-error-chain",
  "strum",
  "test-strategy",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "uuid",
@@ -6738,7 +6739,7 @@ dependencies = [
  "serde",
  "sled-hardware-types",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -6819,7 +6820,7 @@ dependencies = [
  "slog-error-chain",
  "sp-sim",
  "subprocess",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
  "tokio-tungstenite 0.23.1",
@@ -6980,7 +6981,7 @@ dependencies = [
  "pq-sys",
  "pretty_assertions",
  "progenitor-client",
- "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04)",
+ "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739)",
  "qorb",
  "rand",
  "rcgen",
@@ -7011,7 +7012,7 @@ dependencies = [
  "subprocess",
  "tempfile",
  "term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-postgres",
  "tokio-util",
@@ -7119,7 +7120,7 @@ dependencies = [
  "smf",
  "strum",
  "tar",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "walkdir",
@@ -7139,7 +7140,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_with",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -7245,7 +7246,7 @@ dependencies = [
  "oximeter-producer",
  "oxnet",
  "pretty_assertions",
- "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04)",
+ "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739)",
  "propolis-mock-server",
  "propolis_api_types",
  "rand",
@@ -7280,7 +7281,7 @@ dependencies = [
  "subprocess",
  "tar",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tofino",
  "tokio",
  "tokio-stream",
@@ -7327,7 +7328,7 @@ dependencies = [
  "subprocess",
  "tar",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-postgres",
  "usdt",
@@ -7395,7 +7396,7 @@ dependencies = [
  "generic-array",
  "getrandom",
  "group",
- "hashbrown 0.15.0",
+ "hashbrown 0.15.1",
  "hex",
  "hickory-proto",
  "hmac",
@@ -7406,7 +7407,6 @@ dependencies = [
  "indicatif",
  "inout",
  "itertools 0.10.5",
- "itertools 0.12.1",
  "lalrpop-util",
  "lazy_static",
  "libc",
@@ -7441,7 +7441,7 @@ dependencies = [
  "reqwest",
  "rsa",
  "rustix",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-webpki 0.102.8",
  "schemars",
  "scopeguard",
@@ -7507,7 +7507,7 @@ dependencies = [
  "serde_json",
  "slog",
  "tar",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.7.8",
  "topological-sort",
@@ -7645,7 +7645,7 @@ dependencies = [
 [[package]]
 name = "opte"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 dependencies = [
  "bitflags 2.6.0",
  "cfg-if",
@@ -7664,7 +7664,7 @@ dependencies = [
 [[package]]
 name = "opte-api"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 dependencies = [
  "illumos-sys-hdrs",
  "ingot",
@@ -7677,7 +7677,7 @@ dependencies = [
 [[package]]
 name = "opte-ioctl"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 dependencies = [
  "libc",
  "libnet 0.1.0 (git+https://github.com/oxidecomputer/netadm-sys)",
@@ -7685,7 +7685,7 @@ dependencies = [
  "oxide-vpc",
  "postcard",
  "serde",
- "thiserror",
+ "thiserror 2.0.3",
 ]
 
 [[package]]
@@ -7699,7 +7699,7 @@ dependencies = [
  "maplit",
  "oso-derive",
  "polar-core",
- "thiserror",
+ "thiserror 1.0.69",
  "tracing",
 ]
 
@@ -7737,7 +7737,7 @@ dependencies = [
  "reqwest",
  "serde",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -7745,7 +7745,7 @@ dependencies = [
 [[package]]
 name = "oxide-vpc"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/opte?rev=98247c27846133a80fdb8f730f0c57e72d766561#98247c27846133a80fdb8f730f0c57e72d766561"
+source = "git+https://github.com/oxidecomputer/opte?rev=b56afeeb14e0042cbd7bda85b166ed86ee17820e#b56afeeb14e0042cbd7bda85b166ed86ee17820e"
 dependencies = [
  "cfg-if",
  "illumos-sys-hdrs",
@@ -7844,7 +7844,7 @@ dependencies = [
  "slog-term",
  "strum",
  "subprocess",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "uuid",
@@ -7907,7 +7907,7 @@ dependencies = [
  "strum",
  "tabled",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-util",
  "usdt",
@@ -7934,7 +7934,7 @@ dependencies = [
  "slog",
  "slog-async",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -7970,7 +7970,7 @@ dependencies = [
  "slog",
  "slog-dtrace",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -8045,7 +8045,7 @@ dependencies = [
  "serde",
  "serde_json",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "trybuild",
  "uuid",
 ]
@@ -8398,7 +8398,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "879952a81a83930934cbf1786752d6dedc3b1f29e8f8fb2ad1d0a36f377cf442"
 dependencies = [
  "memchr",
- "thiserror",
+ "thiserror 1.0.69",
  "ucd-trie",
 ]
 
@@ -8934,7 +8934,7 @@ dependencies = [
  "serde",
  "serde_json",
  "syn 2.0.87",
- "thiserror",
+ "thiserror 1.0.69",
  "typify",
  "unicode-ident",
 ]
@@ -8960,7 +8960,7 @@ dependencies = [
 [[package]]
 name = "propolis-client"
 version = "0.1.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "async-trait",
  "base64 0.21.7",
@@ -8972,7 +8972,7 @@ dependencies = [
  "serde",
  "serde_json",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-tungstenite 0.21.0",
  "uuid",
@@ -8993,7 +8993,7 @@ dependencies = [
  "serde",
  "serde_json",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-tungstenite 0.21.0",
  "uuid",
@@ -9002,7 +9002,7 @@ dependencies = [
 [[package]]
 name = "propolis-mock-server"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "anyhow",
  "atty",
@@ -9023,7 +9023,7 @@ dependencies = [
  "slog-bunyan",
  "slog-dtrace",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-tungstenite 0.21.0",
  "uuid",
@@ -9037,27 +9037,27 @@ dependencies = [
  "cpuid_profile_config",
  "serde",
  "serde_derive",
- "thiserror",
+ "thiserror 1.0.69",
  "toml 0.7.8",
 ]
 
 [[package]]
 name = "propolis_api_types"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "crucible-client-types",
  "propolis_types",
  "schemars",
  "serde",
- "thiserror",
+ "thiserror 1.0.69",
  "uuid",
 ]
 
 [[package]]
 name = "propolis_types"
 version = "0.0.0"
-source = "git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04#6936f1a949d155da38d3148abd42caef337dea04"
+source = "git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739#220a6f367c18f2452dbc4fa9086f3fe73b961739"
 dependencies = [
  "schemars",
  "serde",
@@ -9116,7 +9116,7 @@ dependencies = [
  "schemars",
  "serde",
  "serde_json",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
  "tokio-tungstenite 0.24.0",
@@ -9151,9 +9151,9 @@ dependencies = [
  "quinn-proto",
  "quinn-udp",
  "rustc-hash 2.0.0",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "socket2",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tracing",
 ]
@@ -9168,9 +9168,9 @@ dependencies = [
  "rand",
  "ring 0.17.8",
  "rustc-hash 2.0.0",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "slab",
- "thiserror",
+ "thiserror 1.0.69",
  "tinyvec",
  "tracing",
 ]
@@ -9296,7 +9296,7 @@ dependencies = [
  "hyper",
  "omicron-workspace-hack",
  "proptest",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-util",
 ]
@@ -9424,7 +9424,7 @@ checksum = "ba009ff324d1fc1b900bd1fdb31564febe58a8ccc8a6fdbb93b543d33b13ca43"
 dependencies = [
  "getrandom",
  "libredox",
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -9442,7 +9442,7 @@ dependencies = [
  "strip-ansi-escapes",
  "strum",
  "strum_macros 0.26.4",
- "thiserror",
+ "thiserror 1.0.69",
  "unicode-segmentation",
  "unicode-width 0.1.14",
 ]
@@ -9582,7 +9582,7 @@ dependencies = [
  "percent-encoding",
  "pin-project-lite",
  "quinn",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pemfile 2.2.0",
  "rustls-pki-types",
  "serde",
@@ -9803,7 +9803,7 @@ dependencies = [
  "ssh-encoding",
  "ssh-key",
  "subtle",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -9861,7 +9861,7 @@ dependencies = [
  "spki",
  "ssh-encoding",
  "ssh-key",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
  "typenum",
@@ -9923,7 +9923,7 @@ checksum = "f1adc9dfed5cc999077978cc7163b9282c5751c8d39827c4ea8c8c220ca5a440"
 dependencies = [
  "serde",
  "tempfile",
- "thiserror",
+ "thiserror 1.0.69",
  "toml 0.8.19",
  "toolchain_find",
 ]
@@ -9969,9 +9969,9 @@ dependencies = [
 
 [[package]]
 name = "rustls"
-version = "0.23.14"
+version = "0.23.19"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "415d9944693cb90382053259f89fbb077ea730ad7273047ec63b19bc9b160ba8"
+checksum = "934b404430bb06b3fae2cba809eb45a1ab1aecd64491213d7c3301b88393f8d1"
 dependencies = [
  "aws-lc-rs",
  "log",
@@ -10016,9 +10016,9 @@ dependencies = [
 
 [[package]]
 name = "rustls-pki-types"
-version = "1.9.0"
+version = "1.10.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
-checksum = "0e696e35370c65c9c541198af4543ccd580cf17fc25d8e05c5a242b202488c55"
+checksum = "16f1201b3c9a7ee8039bcadc17b7e605e2945b27eee7631788c1bd2b0643674b"
 
 [[package]]
 name = "rustls-webpki"
@@ -10147,7 +10147,7 @@ dependencies = [
  "quick-xml",
  "rand",
  "serde",
- "thiserror",
+ "thiserror 1.0.69",
  "url",
  "uuid",
 ]
@@ -10720,7 +10720,7 @@ dependencies = [
  "omicron-uuid-kinds",
  "omicron-workspace-hack",
  "oxnet",
- "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=6936f1a949d155da38d3148abd42caef337dea04)",
+ "propolis-client 0.1.0 (git+https://github.com/oxidecomputer/propolis?rev=220a6f367c18f2452dbc4fa9086f3fe73b961739)",
  "rcgen",
  "schemars",
  "serde",
@@ -10730,7 +10730,7 @@ dependencies = [
  "sled-hardware-types",
  "slog",
  "strum",
- "thiserror",
+ "thiserror 1.0.69",
  "toml 0.8.19",
  "uuid",
 ]
@@ -10741,7 +10741,7 @@ version = "0.1.0"
 dependencies = [
  "futures",
  "omicron-workspace-hack",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
 ]
 
@@ -10770,7 +10770,7 @@ dependencies = [
  "sled-hardware-types",
  "slog",
  "slog-error-chain",
- "thiserror",
+ "thiserror 1.0.69",
  "tofino",
  "tokio",
  "uuid",
@@ -10814,7 +10814,7 @@ dependencies = [
  "serde_json",
  "sled-hardware",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -10971,7 +10971,7 @@ version = "0.2.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "4a491bfc47dffa70a3c267bc379e9de9f4b0a7195e474a94498189b177f8d18c"
 dependencies = [
- "thiserror",
+ "thiserror 1.0.69",
 ]
 
 [[package]]
@@ -11051,7 +11051,7 @@ dependencies = [
  "serde",
  "slog",
  "slog-dtrace",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
 ]
@@ -11095,7 +11095,7 @@ dependencies = [
  "ed25519-dalek",
  "libipcc",
  "pem-rfc7468",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "secrecy",
  "serde",
  "sha2",
@@ -11103,7 +11103,7 @@ dependencies = [
  "slog",
  "slog-async",
  "slog-term",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-rustls 0.26.0",
  "toml 0.8.19",
@@ -11222,7 +11222,7 @@ dependencies = [
  "serde",
  "serde_json",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "uuid",
 ]
@@ -11634,7 +11634,16 @@ version = "1.0.69"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "b6aaf5339b578ea85b50e080feb250a3e8ae8cfcdff9a461c9ec2904bc923f52"
 dependencies = [
- "thiserror-impl",
+ "thiserror-impl 1.0.69",
+]
+
+[[package]]
+name = "thiserror"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c006c85c7651b3cf2ada4584faa36773bd07bac24acfb39f3c431b36d7e667aa"
+dependencies = [
+ "thiserror-impl 2.0.3",
 ]
 
 [[package]]
@@ -11648,6 +11657,17 @@ dependencies = [
  "syn 2.0.87",
 ]
 
+[[package]]
+name = "thiserror-impl"
+version = "2.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f077553d607adc1caf65430528a576c757a71ed73944b66ebb58ef2bbd243568"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn 2.0.87",
+]
+
 [[package]]
 name = "thiserror-impl-no-std"
 version = "2.0.2"
@@ -11928,7 +11948,7 @@ version = "0.26.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0c7bc40d0e5a97695bb96e27995cd3a08538541b0a846f65bba7a359f36700d4"
 dependencies = [
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "rustls-pki-types",
  "tokio",
 ]
@@ -12093,7 +12113,7 @@ dependencies = [
  "pem",
  "percent-encoding",
  "reqwest",
- "rustls 0.23.14",
+ "rustls 0.23.19",
  "serde",
  "serde_json",
  "serde_plain",
@@ -12163,7 +12183,7 @@ dependencies = [
  "lazy_static",
  "rand",
  "smallvec 1.13.2",
- "thiserror",
+ "thiserror 1.0.69",
  "tinyvec",
  "tracing",
  "url",
@@ -12278,7 +12298,7 @@ dependencies = [
  "log",
  "rand",
  "sha1",
- "thiserror",
+ "thiserror 1.0.69",
  "url",
  "utf-8",
 ]
@@ -12297,7 +12317,7 @@ dependencies = [
  "log",
  "rand",
  "sha1",
- "thiserror",
+ "thiserror 1.0.69",
  "utf-8",
 ]
 
@@ -12315,7 +12335,7 @@ dependencies = [
  "log",
  "rand",
  "sha1",
- "thiserror",
+ "thiserror 1.0.69",
  "utf-8",
 ]
 
@@ -12380,7 +12400,7 @@ dependencies = [
  "serde",
  "serde_json",
  "syn 2.0.87",
- "thiserror",
+ "thiserror 1.0.69",
  "unicode-ident",
 ]
 
@@ -12546,7 +12566,7 @@ dependencies = [
  "rand",
  "sha2",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-util",
  "tough",
@@ -12649,7 +12669,7 @@ dependencies = [
  "serde",
  "serde_json",
  "syn 2.0.87",
- "thiserror",
+ "thiserror 1.0.69",
  "thread-id",
  "version_check",
 ]
@@ -13035,7 +13055,7 @@ dependencies = [
  "sha2",
  "sled-hardware-types",
  "slog",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "toml 0.8.19",
  "update-engine",
@@ -13125,7 +13145,7 @@ dependencies = [
  "slog-dtrace",
  "subprocess",
  "tar",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "tokio-stream",
  "tokio-util",
@@ -13729,7 +13749,7 @@ dependencies = [
  "flate2",
  "indexmap 2.6.0",
  "memchr",
- "thiserror",
+ "thiserror 1.0.69",
  "zopfli",
 ]
 
@@ -13740,7 +13760,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3596bbc963cd9dbaa69b02e349af4d061c56c41d211ba64150a2cedb2f722707"
 dependencies = [
  "itertools 0.10.5",
- "thiserror",
+ "thiserror 1.0.69",
  "zone_cfg_derive 0.1.2",
 ]
 
@@ -13751,7 +13771,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a62a428a79ea2224ce8ab05d6d8a21bdd7b4b68a8dbc1230511677a56e72ef22"
 dependencies = [
  "itertools 0.10.5",
- "thiserror",
+ "thiserror 1.0.69",
  "tokio",
  "zone_cfg_derive 0.3.0",
 ]
diff --git a/Cargo.toml b/Cargo.toml
index c9fc5e300d..c31a323093 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -348,10 +348,10 @@ cookie = "0.18"
 criterion = { version = "0.5.1", features = [ "async_tokio" ] }
 crossbeam = "0.8"
 crossterm = { version = "0.28.1", features = ["event-stream"] }
-crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239" }
-crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239" }
-crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239" }
-crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239" }
+crucible-agent-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5a41b826171c7d2a8412fa833377ab1df25ee8ec" }
+crucible-pantry-client = { git = "https://github.com/oxidecomputer/crucible", rev = "5a41b826171c7d2a8412fa833377ab1df25ee8ec" }
+crucible-smf = { git = "https://github.com/oxidecomputer/crucible", rev = "5a41b826171c7d2a8412fa833377ab1df25ee8ec" }
+crucible-common = { git = "https://github.com/oxidecomputer/crucible", rev = "5a41b826171c7d2a8412fa833377ab1df25ee8ec" }
 csv = "1.3.0"
 curve25519-dalek = "4"
 datatest-stable = "0.2.9"
@@ -363,7 +363,7 @@ derive_more = "0.99.18"
 derive-where = "1.2.7"
 # Having the i-implement-... feature here makes diesel go away from the workspace-hack
 diesel = { version = "2.2.4", features = ["i-implement-a-third-party-backend-and-opt-into-breaking-changes", "postgres", "r2d2", "chrono", "serde_json", "network-address", "uuid"] }
-diesel-dtrace = "0.3.0"
+diesel-dtrace = "0.4.0"
 dns-server = { path = "dns-server" }
 dns-server-api = { path = "dns-server-api" }
 dns-service-client = { path = "clients/dns-service-client" }
@@ -494,7 +494,7 @@ omicron-test-utils = { path = "test-utils" }
 omicron-workspace-hack = "0.1.0"
 omicron-zone-package = "0.11.1"
 oxide-client = { path = "clients/oxide-client" }
-oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "98247c27846133a80fdb8f730f0c57e72d766561", features = [ "api", "std" ] }
+oxide-vpc = { git = "https://github.com/oxidecomputer/opte", rev = "b56afeeb14e0042cbd7bda85b166ed86ee17820e", features = [ "api", "std" ] }
 oxlog = { path = "dev-tools/oxlog" }
 oxnet = { git = "https://github.com/oxidecomputer/oxnet" }
 once_cell = "1.20.2"
@@ -504,7 +504,7 @@ openapiv3 = "2.0.0"
 # must match samael's crate!
 openssl = "0.10"
 openssl-sys = "0.9"
-opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "98247c27846133a80fdb8f730f0c57e72d766561" }
+opte-ioctl = { git = "https://github.com/oxidecomputer/opte", rev = "b56afeeb14e0042cbd7bda85b166ed86ee17820e" }
 oso = "0.27"
 owo-colors = "4.1.0"
 oximeter = { path = "oximeter/oximeter" }
@@ -541,10 +541,10 @@ prettyplease = { version = "0.2.25", features = ["verbatim"] }
 proc-macro2 = "1.0"
 progenitor = "0.8.0"
 progenitor-client = "0.8.0"
-bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "6936f1a949d155da38d3148abd42caef337dea04" }
-propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "6936f1a949d155da38d3148abd42caef337dea04" }
-propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "6936f1a949d155da38d3148abd42caef337dea04" }
-propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "6936f1a949d155da38d3148abd42caef337dea04" }
+bhyve_api = { git = "https://github.com/oxidecomputer/propolis", rev = "220a6f367c18f2452dbc4fa9086f3fe73b961739" }
+propolis_api_types = { git = "https://github.com/oxidecomputer/propolis", rev = "220a6f367c18f2452dbc4fa9086f3fe73b961739" }
+propolis-client = { git = "https://github.com/oxidecomputer/propolis", rev = "220a6f367c18f2452dbc4fa9086f3fe73b961739" }
+propolis-mock-server = { git = "https://github.com/oxidecomputer/propolis", rev = "220a6f367c18f2452dbc4fa9086f3fe73b961739" }
 proptest = "1.5.0"
 qorb = "0.2.1"
 quote = "1.0"
diff --git a/clippy.toml b/clippy.toml
index ffa3ffac70..31e28d5911 100644
--- a/clippy.toml
+++ b/clippy.toml
@@ -10,4 +10,10 @@ disallowed-methods = [
     # `IncompleteOnConflictExt::as_partial_index` in `nexus-db-queries`.
     # See the documentation of that method for more.
     "diesel::upsert::DecoratableTarget::filter_target",
+
+    # This form of transaction is susceptible to serialization failures,
+    # and can fail spuriously.
+    # Instead, the "transaction_retry_wrapper" should be preferred, as it
+    # automatically retries transactions experiencing contention.
+    { path = "async_bb8_diesel::AsyncConnection::transaction_async", reason = "Prefer to use transaction_retry_wrapper, if possible. Feel free to override this for tests and nested transactions." },
 ]
diff --git a/common/src/api/internal/shared.rs b/common/src/api/internal/shared.rs
index e0d6452376..94440df2d5 100644
--- a/common/src/api/internal/shared.rs
+++ b/common/src/api/internal/shared.rs
@@ -872,6 +872,7 @@ pub struct ExternalIpGatewayMap {
 
 /// Describes the purpose of the dataset.
 #[derive(Debug, Clone, PartialEq, Eq, Ord, PartialOrd, Hash, EnumCount)]
+#[cfg_attr(feature = "testing", derive(test_strategy::Arbitrary))]
 pub enum DatasetKind {
     // Durable datasets for zones
     Cockroach,
diff --git a/common/src/disk.rs b/common/src/disk.rs
index 3500d4dabb..99c2b2db7b 100644
--- a/common/src/disk.rs
+++ b/common/src/disk.rs
@@ -103,6 +103,10 @@ impl DatasetName {
         Self { pool_name, kind }
     }
 
+    pub fn into_parts(self) -> (ZpoolName, DatasetKind) {
+        (self.pool_name, self.kind)
+    }
+
     pub fn pool(&self) -> &ZpoolName {
         &self.pool_name
     }
diff --git a/dev-tools/clickhouse-cluster-dev/src/main.rs b/dev-tools/clickhouse-cluster-dev/src/main.rs
index 2f85c53ab6..1b78144e4c 100644
--- a/dev-tools/clickhouse-cluster-dev/src/main.rs
+++ b/dev-tools/clickhouse-cluster-dev/src/main.rs
@@ -22,6 +22,16 @@ use std::time::Duration;
 async fn main() -> Result<()> {
     let request_timeout = Duration::from_secs(15);
     let (logctx, path) = default_clickhouse_log_ctx_and_path();
+
+    if path.exists() {
+        let deployment =
+            default_clickhouse_cluster_test_deployment(path.clone());
+        slog::info!(logctx.log, "Stopping test clickhouse nodes");
+        deployment.teardown()?;
+        slog::info!(logctx.log, "Removing previous temporary test directory");
+        std::fs::remove_dir_all(&path)?;
+    }
+
     std::fs::create_dir(&path)?;
 
     slog::info!(logctx.log, "Setting up a ClickHouse cluster");
diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs
index 4cccc3c23e..667a666375 100644
--- a/dev-tools/omdb/src/bin/omdb/db.rs
+++ b/dev-tools/omdb/src/bin/omdb/db.rs
@@ -14,6 +14,8 @@
 
 // NOTE: emanates from Tabled macros
 #![allow(clippy::useless_vec)]
+// NOTE: allowing "transaction_async" without retry
+#![allow(clippy::disallowed_methods)]
 
 use crate::check_allow_destructive::DestructiveOperationToken;
 use crate::helpers::const_max_len;
diff --git a/dev-tools/omdb/src/bin/omdb/oximeter.rs b/dev-tools/omdb/src/bin/omdb/oximeter.rs
index cc1efd126f..7dae63e947 100644
--- a/dev-tools/omdb/src/bin/omdb/oximeter.rs
+++ b/dev-tools/omdb/src/bin/omdb/oximeter.rs
@@ -7,11 +7,15 @@
 use crate::helpers::CONNECTION_OPTIONS_HEADING;
 use crate::Omdb;
 use anyhow::Context;
+use chrono::SecondsFormat;
 use clap::Args;
 use clap::Subcommand;
 use futures::TryStreamExt;
 use internal_dns_types::names::ServiceName;
+use oximeter_client::types::FailedCollection;
+use oximeter_client::types::ProducerDetails;
 use oximeter_client::types::ProducerEndpoint;
+use oximeter_client::types::SuccessfulCollection;
 use oximeter_client::Client;
 use slog::Logger;
 use std::net::SocketAddr;
@@ -41,6 +45,11 @@ pub struct OximeterArgs {
 enum OximeterCommands {
     /// List the producers the collector is assigned to poll.
     ListProducers,
+    /// Fetch details about a single assigned producer.
+    ProducerDetails {
+        /// The ID of the producer to fetch.
+        producer_id: Uuid,
+    },
 }
 
 impl OximeterArgs {
@@ -81,9 +90,26 @@ impl OximeterArgs {
             OximeterCommands::ListProducers => {
                 self.list_producers(client).await
             }
+            OximeterCommands::ProducerDetails { producer_id } => {
+                self.producer_details(client, producer_id).await
+            }
         }
     }
 
+    async fn producer_details(
+        &self,
+        client: Client,
+        producer_id: Uuid,
+    ) -> anyhow::Result<()> {
+        let details = client
+            .producer_details(&producer_id)
+            .await
+            .context("failed to fetch producer details")?
+            .into_inner();
+        print_producer_details(details);
+        Ok(())
+    }
+
     async fn list_producers(&self, client: Client) -> anyhow::Result<()> {
         let info = client
             .collector_info()
@@ -120,11 +146,168 @@ struct Producer {
 
 impl From<ProducerEndpoint> for Producer {
     fn from(p: ProducerEndpoint) -> Self {
-        let interval = Duration::new(p.interval.secs, p.interval.nanos);
         Self {
             id: p.id,
             address: p.address.parse().unwrap(),
-            interval: humantime::format_duration(interval).to_string(),
+            interval: duration_to_humantime(&p.interval),
+        }
+    }
+}
+
+fn duration_to_humantime(d: &oximeter_client::types::Duration) -> String {
+    let interval = Duration::new(d.secs, d.nanos);
+    humantime::format_duration(interval).to_string()
+}
+
+const WIDTH: usize = 12;
+
+fn print_producer_details(details: ProducerDetails) {
+    println!();
+    println!("{:>WIDTH$}: {}", "ID", details.id);
+    println!("{:>WIDTH$}: {}", "Address", details.address);
+    println!(
+        "{:>WIDTH$}: {}",
+        "Registered",
+        details.registered.to_rfc3339_opts(SecondsFormat::Millis, true)
+    );
+    println!(
+        "{:>WIDTH$}: {}",
+        "Updated",
+        details.updated.to_rfc3339_opts(SecondsFormat::Millis, true)
+    );
+    println!(
+        "{:>WIDTH$}: {}",
+        "Interval",
+        duration_to_humantime(&details.interval)
+    );
+    println!("{:>WIDTH$}: {}", "Successes", details.n_collections);
+    println!("{:>WIDTH$}: {}", "Failures", details.n_failures);
+    println!();
+    print_last_success(details.last_success.as_ref());
+    println!();
+    print_last_failure(details.last_failure.as_ref());
+}
+
+fn print_last_success(maybe_success: Option<&SuccessfulCollection>) {
+    print!("{:>WIDTH$}: ", "Last success");
+    match maybe_success {
+        None => println!("None"),
+        Some(success) => {
+            println!();
+            println!(
+                "{:>WIDTH$}: {}",
+                "Started at",
+                success.started_at.to_rfc3339_opts(SecondsFormat::Millis, true)
+            );
+            println!(
+                "{:>WIDTH$}: {:?}",
+                "Queued for",
+                Duration::new(
+                    success.time_queued.secs,
+                    success.time_queued.nanos
+                )
+            );
+            println!(
+                "{:>WIDTH$}: {:?}",
+                "Duration",
+                Duration::new(
+                    success.time_collecting.secs,
+                    success.time_collecting.nanos
+                )
+            );
+            println!("{:>WIDTH$}: {}", "Samples", success.n_samples);
         }
     }
 }
+
+fn print_last_failure(maybe_failure: Option<&FailedCollection>) {
+    print!("{:>WIDTH$}: ", "Last failure");
+    match maybe_failure {
+        None => println!("None"),
+        Some(failure) => {
+            println!();
+            println!(
+                "{:>WIDTH$}: {}",
+                "Started at",
+                failure.started_at.to_rfc3339_opts(SecondsFormat::Millis, true)
+            );
+            println!(
+                "{:>WIDTH$}: {:?}",
+                "Queued for",
+                Duration::new(
+                    failure.time_queued.secs,
+                    failure.time_queued.nanos
+                )
+            );
+            println!(
+                "{:>WIDTH$}: {:?}",
+                "Duration",
+                Duration::new(
+                    failure.time_collecting.secs,
+                    failure.time_collecting.nanos
+                )
+            );
+            println!("{:>WIDTH$}: {}", "Reason", failure.reason);
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::print_producer_details;
+    use chrono::Utc;
+    use oximeter_client::types::FailedCollection;
+    use oximeter_client::types::ProducerDetails;
+    use oximeter_client::types::SuccessfulCollection;
+    use std::time::Duration;
+    use uuid::Uuid;
+
+    #[test]
+    fn test_print_producer_details_success_only() {
+        let now = Utc::now();
+        let details = ProducerDetails {
+            id: Uuid::new_v4(),
+            address: "[::1]:12345".parse().unwrap(),
+            interval: Duration::from_secs(10).into(),
+            last_success: Some(SuccessfulCollection {
+                n_samples: 100,
+                started_at: now,
+                time_collecting: Duration::from_millis(100).into(),
+                time_queued: Duration::from_millis(10).into(),
+            }),
+            last_failure: None,
+            n_collections: 1,
+            n_failures: 0,
+            registered: now,
+            updated: now,
+        };
+        print_producer_details(details);
+    }
+
+    #[test]
+    fn test_print_producer_details_with_failure() {
+        let now = Utc::now();
+        let details = ProducerDetails {
+            id: Uuid::new_v4(),
+            interval: Duration::from_secs(10).into(),
+            address: "[::1]:12345".parse().unwrap(),
+            last_success: Some(SuccessfulCollection {
+                n_samples: 100,
+                started_at: now,
+                time_collecting: Duration::from_millis(100).into(),
+                time_queued: Duration::from_millis(10).into(),
+            }),
+            last_failure: Some(FailedCollection {
+                started_at: now,
+                time_collecting: Duration::from_millis(100).into(),
+                time_queued: Duration::from_millis(10).into(),
+                reason: String::from("unreachable"),
+            }),
+            n_collections: 1,
+            n_failures: 1,
+            registered: now,
+            updated: now,
+        };
+        print_producer_details(details);
+    }
+}
diff --git a/dev-tools/omdb/tests/usage_errors.out b/dev-tools/omdb/tests/usage_errors.out
index 5e66467403..85fc761289 100644
--- a/dev-tools/omdb/tests/usage_errors.out
+++ b/dev-tools/omdb/tests/usage_errors.out
@@ -761,8 +761,9 @@ Query oximeter collector state
 Usage: omdb oximeter [OPTIONS] <COMMAND>
 
 Commands:
-  list-producers  List the producers the collector is assigned to poll
-  help            Print this message or the help of the given subcommand(s)
+  list-producers    List the producers the collector is assigned to poll
+  producer-details  Fetch details about a single assigned producer
+  help              Print this message or the help of the given subcommand(s)
 
 Options:
       --log-level <LOG_LEVEL>  log level filter [env: LOG_LEVEL=] [default: warn]
diff --git a/illumos-utils/src/zfs.rs b/illumos-utils/src/zfs.rs
index fa09fb22c5..f9edb8de86 100644
--- a/illumos-utils/src/zfs.rs
+++ b/illumos-utils/src/zfs.rs
@@ -5,14 +5,16 @@
 //! Utilities for poking at ZFS.
 
 use crate::{execute, PFEXEC};
+use anyhow::anyhow;
+use anyhow::bail;
 use anyhow::Context;
 use camino::{Utf8Path, Utf8PathBuf};
 use omicron_common::api::external::ByteCount;
 use omicron_common::disk::CompressionAlgorithm;
 use omicron_common::disk::DiskIdentity;
 use omicron_uuid_kinds::DatasetUuid;
+use std::collections::BTreeMap;
 use std::fmt;
-use std::str::FromStr;
 
 // These locations in the ramdisk must only be used by the switch zone.
 //
@@ -236,56 +238,118 @@ pub struct DatasetProperties {
 }
 
 impl DatasetProperties {
-    // care about.
-    const ZFS_LIST_STR: &'static str =
+    const ZFS_GET_PROPS: &'static str =
         "oxide:uuid,name,avail,used,quota,reservation,compression";
 }
 
-// An inner parsing function, so that the FromStr implementation can always emit
-// the string 's' that failed to parse in the error message.
-fn dataset_properties_parse(
-    s: &str,
-) -> Result<DatasetProperties, anyhow::Error> {
-    let mut iter = s.split_whitespace();
-
-    let id = match iter.next().context("Missing UUID")? {
-        "-" => None,
-        anything_else => Some(anything_else.parse::<DatasetUuid>()?),
-    };
-
-    let name = iter.next().context("Missing 'name'")?.to_string();
-    let avail =
-        iter.next().context("Missing 'avail'")?.parse::<u64>()?.try_into()?;
-    let used =
-        iter.next().context("Missing 'used'")?.parse::<u64>()?.try_into()?;
-    let quota = match iter.next().context("Missing 'quota'")?.parse::<u64>()? {
-        0 => None,
-        q => Some(q.try_into()?),
-    };
-    let reservation =
-        match iter.next().context("Missing 'reservation'")?.parse::<u64>()? {
-            0 => None,
-            r => Some(r.try_into()?),
-        };
-    let compression = iter.next().context("Missing 'compression'")?.to_string();
-
-    Ok(DatasetProperties {
-        id,
-        name,
-        avail,
-        used,
-        quota,
-        reservation,
-        compression,
-    })
-}
+impl DatasetProperties {
+    /// Parses dataset properties, assuming that the caller is providing the
+    /// output of the following command as stdout:
+    ///
+    /// zfs get -rpo name,property,value,source $ZFS_GET_PROPS $DATASETS
+    fn parse_many(
+        stdout: &str,
+    ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
+        let name_prop_val_source_list = stdout.trim().split('\n');
+
+        let mut datasets: BTreeMap<&str, BTreeMap<&str, _>> = BTreeMap::new();
+        for name_prop_val_source in name_prop_val_source_list {
+            // "-H" indicates that these columns are tab-separated;
+            // each column may internally have whitespace.
+            let mut iter = name_prop_val_source.split('\t');
+
+            let (name, prop, val, source) = (
+                iter.next().context("Missing 'name'")?,
+                iter.next().context("Missing 'property'")?,
+                iter.next().context("Missing 'value'")?,
+                iter.next().context("Missing 'source'")?,
+            );
+            if let Some(extra) = iter.next() {
+                bail!("Unexpected column data: '{extra}'");
+            }
 
-impl FromStr for DatasetProperties {
-    type Err = anyhow::Error;
+            let props = datasets.entry(name).or_default();
+            props.insert(prop, (val, source));
+        }
 
-    fn from_str(s: &str) -> Result<Self, Self::Err> {
-        dataset_properties_parse(s)
-            .with_context(|| format!("Failed to parse: {s}"))
+        datasets
+            .into_iter()
+            .map(|(dataset_name, props)| {
+                let id = props
+                    .get("oxide:uuid")
+                    .filter(|(prop, source)| {
+                        // Dataset UUIDs are properties that are optionally attached to
+                        // datasets. However, some datasets are nested - to avoid them
+                        // from propagating, we explicitly ignore this value if it is
+                        // inherited.
+                        //
+                        // This can be the case for the "zone" filesystem root, which
+                        // can propagate this property to a child zone without it set.
+                        !source.starts_with("inherited") && *prop != "-"
+                    })
+                    .map(|(prop, _source)| {
+                        prop.parse::<DatasetUuid>()
+                            .context("Failed to parse UUID")
+                    })
+                    .transpose()?;
+                let name = dataset_name.to_string();
+                let avail = props
+                    .get("available")
+                    .map(|(prop, _source)| prop)
+                    .ok_or(anyhow!("Missing 'available'"))?
+                    .parse::<u64>()
+                    .context("Failed to parse 'available'")?
+                    .try_into()?;
+                let used = props
+                    .get("used")
+                    .map(|(prop, _source)| prop)
+                    .ok_or(anyhow!("Missing 'used'"))?
+                    .parse::<u64>()
+                    .context("Failed to parse 'used'")?
+                    .try_into()?;
+                let quota = props
+                    .get("quota")
+                    .filter(|(_prop, source)| {
+                        // If a quota has not been set explicitly, it has a default
+                        // source and a value of "zero". Rather than parsing the value
+                        // as zero, it should be ignored.
+                        *source != "default"
+                    })
+                    .map(|(prop, _source)| {
+                        prop.parse::<u64>().context("Failed to parse 'quota'")
+                    })
+                    .transpose()?
+                    .and_then(|v| ByteCount::try_from(v).ok());
+                let reservation = props
+                    .get("reservation")
+                    .filter(|(_prop, source)| {
+                        // If a reservation has not been set explicitly, it has a default
+                        // source and a value of "zero". Rather than parsing the value
+                        // as zero, it should be ignored.
+                        *source != "default"
+                    })
+                    .map(|(prop, _source)| {
+                        prop.parse::<u64>()
+                            .context("Failed to parse 'reservation'")
+                    })
+                    .transpose()?
+                    .and_then(|v| ByteCount::try_from(v).ok());
+                let compression = props
+                    .get("compression")
+                    .map(|(prop, _source)| prop.to_string())
+                    .ok_or_else(|| anyhow!("Missing 'compression'"))?;
+
+                Ok(DatasetProperties {
+                    id,
+                    name,
+                    avail,
+                    used,
+                    quota,
+                    reservation,
+                    compression,
+                })
+            })
+            .collect::<Result<Vec<_>, _>>()
     }
 }
 
@@ -335,6 +399,7 @@ impl Zfs {
     }
 
     /// Get information about datasets within a list of zpools / datasets.
+    /// Returns properties for all input datasets and their direct children.
     ///
     /// This function is similar to [Zfs::list_datasets], but provides a more
     /// substantial results about the datasets found.
@@ -344,26 +409,24 @@ impl Zfs {
         datasets: &[String],
     ) -> Result<Vec<DatasetProperties>, anyhow::Error> {
         let mut command = std::process::Command::new(ZFS);
-        let cmd = command.args(&["list", "-d", "1", "-rHpo"]);
+        let cmd = command.args(&[
+            "get",
+            "-d",
+            "1",
+            "-Hpo",
+            "name,property,value,source",
+        ]);
 
         // Note: this is tightly coupled with the layout of DatasetProperties
-        cmd.arg(DatasetProperties::ZFS_LIST_STR);
+        cmd.arg(DatasetProperties::ZFS_GET_PROPS);
         cmd.args(datasets);
 
         let output = execute(cmd).with_context(|| {
             format!("Failed to get dataset properties for {datasets:?}")
         })?;
         let stdout = String::from_utf8(output.stdout)?;
-        let mut datasets = stdout
-            .trim()
-            .split('\n')
-            .map(|row| row.parse::<DatasetProperties>())
-            .collect::<Result<Vec<_>, _>>()?;
-
-        datasets.sort_by(|d1, d2| d1.name.partial_cmp(&d2.name).unwrap());
-        datasets.dedup_by(|d1, d2| d1.name.eq(&d2.name));
 
-        Ok(datasets)
+        DatasetProperties::parse_many(&stdout)
     }
 
     /// Return the name of a dataset for a ZFS object.
@@ -859,42 +922,68 @@ mod test {
 
     #[test]
     fn parse_dataset_props() {
-        let input =
-            "-       dataset_name        1234   5678   0       0       off";
-        let props = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tname\tI_AM_IGNORED\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let props = DatasetProperties::parse_many(&input)
             .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+
+        assert_eq!(props[0].id, None);
+        assert_eq!(props[0].name, "dataset_name");
+        assert_eq!(props[0].avail.to_bytes(), 1234);
+        assert_eq!(props[0].used.to_bytes(), 5678);
+        assert_eq!(props[0].quota, None);
+        assert_eq!(props[0].reservation, None);
+        assert_eq!(props[0].compression, "off");
+    }
 
-        assert_eq!(props.id, None);
-        assert_eq!(props.name, "dataset_name");
-        assert_eq!(props.avail.to_bytes(), 1234);
-        assert_eq!(props.used.to_bytes(), 5678);
-        assert_eq!(props.quota, None);
-        assert_eq!(props.reservation, None);
-        assert_eq!(props.compression, "off");
+    #[test]
+    fn parse_dataset_too_many_columns() {
+        let input = "dataset_name\tavailable\t1234\t-\tEXTRA\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tname\tI_AM_IGNORED\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let err = DatasetProperties::parse_many(&input)
+            .expect_err("Should have parsed data");
+        assert!(
+            err.to_string().contains("Unexpected column data: 'EXTRA'"),
+            "{err}"
+        );
     }
 
     #[test]
     fn parse_dataset_props_with_optionals() {
-        let input = "d4e1e554-7b98-4413-809e-4a42561c3d0c       dataset_name        1234   5678   111       222       off";
-        let props = DatasetProperties::from_str(&input)
+        let input =
+            "dataset_name\toxide:uuid\td4e1e554-7b98-4413-809e-4a42561c3d0c\tlocal\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited from parent";
+        let props = DatasetProperties::parse_many(&input)
             .expect("Should have parsed data");
-
+        assert_eq!(props.len(), 1);
         assert_eq!(
-            props.id,
+            props[0].id,
             Some("d4e1e554-7b98-4413-809e-4a42561c3d0c".parse().unwrap())
         );
-        assert_eq!(props.name, "dataset_name");
-        assert_eq!(props.avail.to_bytes(), 1234);
-        assert_eq!(props.used.to_bytes(), 5678);
-        assert_eq!(props.quota.map(|q| q.to_bytes()), Some(111));
-        assert_eq!(props.reservation.map(|r| r.to_bytes()), Some(222));
-        assert_eq!(props.compression, "off");
+        assert_eq!(props[0].name, "dataset_name");
+        assert_eq!(props[0].avail.to_bytes(), 1234);
+        assert_eq!(props[0].used.to_bytes(), 5678);
+        assert_eq!(props[0].quota.map(|q| q.to_bytes()), Some(111));
+        assert_eq!(props[0].reservation.map(|r| r.to_bytes()), Some(222));
+        assert_eq!(props[0].compression, "off");
     }
 
     #[test]
     fn parse_dataset_bad_uuid() {
-        let input = "bad       dataset_name        1234   5678   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\toxide:uuid\tbad\t-\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-";
+
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("error parsing UUID (dataset)"),
@@ -904,8 +993,9 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_avail() {
-        let input = "-       dataset_name        BADAVAIL   5678   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\tBADAVAIL\t-\n\
+             dataset_name\tused\t5678\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -915,8 +1005,9 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_usage() {
-        let input = "-       dataset_name        1234   BADUSAGE   111       222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\tBADUSAGE\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -926,8 +1017,10 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_quota() {
-        let input = "-       dataset_name        1234   5678   BADQUOTA      222       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\tBADQUOTA\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -937,8 +1030,11 @@ mod test {
 
     #[test]
     fn parse_dataset_bad_reservation() {
-        let input = "-       dataset_name        1234   5678   111      BADRES       off";
-        let err = DatasetProperties::from_str(&input)
+        let input = "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\tBADRES\t-";
+        let err = DatasetProperties::parse_many(&input)
             .expect_err("Should have failed to parse");
         assert!(
             format!("{err:#}").contains("invalid digit found in string"),
@@ -949,24 +1045,102 @@ mod test {
     #[test]
     fn parse_dataset_missing_fields() {
         let expect_missing = |input: &str, what: &str| {
-            let err = DatasetProperties::from_str(input)
+            let err = DatasetProperties::parse_many(input)
                 .expect_err("Should have failed to parse");
             let err = format!("{err:#}");
             assert!(err.contains(&format!("Missing {what}")), "{err}");
         };
 
         expect_missing(
-            "-       dataset_name        1234   5678   111      222",
-            "'compression'",
+            "dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited",
+            "'available'",
+        );
+        expect_missing(
+            "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-\n\
+             dataset_name\tcompression\toff\tinherited",
+            "'used'",
         );
         expect_missing(
-            "-       dataset_name        1234   5678   111",
-            "'reservation'",
+            "dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tquota\t111\t-\n\
+             dataset_name\treservation\t222\t-",
+            "'compression'",
         );
-        expect_missing("-       dataset_name        1234   5678", "'quota'");
-        expect_missing("-       dataset_name        1234", "'used'");
-        expect_missing("-       dataset_name", "'avail'");
-        expect_missing("-", "'name'");
-        expect_missing("", "UUID");
+    }
+
+    #[test]
+    fn parse_dataset_uuid_ignored_if_inherited() {
+        let input =
+            "dataset_name\toxide:uuid\tb8698ede-60c2-4e16-b792-d28c165cfd12\tinherited from parent\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].id, None);
+    }
+
+    #[test]
+    fn parse_dataset_uuid_ignored_if_dash() {
+        let input = "dataset_name\toxide:uuid\t-\t-\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].id, None);
+    }
+
+    #[test]
+    fn parse_quota_ignored_if_default() {
+        let input = "dataset_name\tquota\t0\tdefault\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].quota, None);
+    }
+
+    #[test]
+    fn parse_reservation_ignored_if_default() {
+        let input = "dataset_name\treservation\t0\tdefault\n\
+             dataset_name\tavailable\t1234\t-\n\
+             dataset_name\tused\t5678\t-\n\
+             dataset_name\tcompression\toff\t-";
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 1);
+        assert_eq!(props[0].reservation, None);
+    }
+
+    #[test]
+    fn parse_sorts_and_dedups() {
+        let input = "foo\tavailable\t111\t-\n\
+             foo\tused\t111\t-\n\
+             foo\tcompression\toff\t-\n\
+             foo\tavailable\t111\t-\n\
+             foo\tused\t111\t-\n\
+             foo\tcompression\toff\t-\n\
+             bar\tavailable\t222\t-\n\
+             bar\tused\t222\t-\n\
+             bar\tcompression\toff\t-";
+
+        let props = DatasetProperties::parse_many(&input)
+            .expect("Should have parsed data");
+        assert_eq!(props.len(), 2);
+        assert_eq!(props[0].name, "bar");
+        assert_eq!(props[0].used, 222.into());
+        assert_eq!(props[1].name, "foo");
+        assert_eq!(props[1].used, 111.into());
     }
 }
diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs
index 380a5c1b00..0c73ae1ae2 100644
--- a/nexus/db-queries/src/db/datastore/deployment.rs
+++ b/nexus/db-queries/src/db/datastore/deployment.rs
@@ -335,6 +335,11 @@ impl DataStore {
         // batch rather than making a bunch of round-trips to the database.
         // We'd do that if we had an interface for doing that with bound
         // parameters, etc.  See oxidecomputer/omicron#973.
+
+        // The risk of a serialization error is possible here, but low,
+        // as most of the operations should be insertions rather than in-place
+        // modifications of existing tables.
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|conn| async move {
             // Insert the row for the blueprint.
             {
@@ -1087,6 +1092,7 @@ impl DataStore {
         // start removing it and we'd also need to make sure we didn't leak a
         // collection if we crash while deleting it.
         let conn = self.pool_connection_authorized(opctx).await?;
+        let err = OptionalError::new();
 
         let (
             nblueprints,
@@ -1101,19 +1107,23 @@ impl DataStore {
             nclickhouse_cluster_configs,
             nclickhouse_keepers,
             nclickhouse_servers,
-        ) = conn
-            .transaction_async(|conn| async move {
+        ) = self.transaction_retry_wrapper("blueprint_delete")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
                 // Ensure that blueprint we're about to delete is not the
                 // current target.
-                let current_target =
-                    Self::blueprint_current_target_only(&conn).await?;
+                let current_target = Self::blueprint_current_target_only(&conn)
+                    .await
+                    .map_err(|txn_err| txn_err.into_diesel(&err))?;
+
                 if current_target.target_id == blueprint_id {
-                    return Err(TransactionError::CustomError(
+                    return Err(err.bail(TransactionError::CustomError(
                         Error::conflict(format!(
                             "blueprint {blueprint_id} is the \
                              current target and cannot be deleted",
                         )),
-                    ));
+                    )));
                 }
 
                 // Remove the record describing the blueprint itself.
@@ -1130,9 +1140,9 @@ impl DataStore {
                 // references to it in any of the remaining tables either, since
                 // deletion always goes through this transaction.
                 if nblueprints == 0 {
-                    return Err(TransactionError::CustomError(
+                    return Err(err.bail(TransactionError::CustomError(
                         authz_blueprint.not_found(),
-                    ));
+                    )));
                 }
 
                 // Remove rows associated with sled states.
@@ -1259,13 +1269,12 @@ impl DataStore {
                     nclickhouse_keepers,
                     nclickhouse_servers,
                 ))
+                }
             })
             .await
-            .map_err(|error| match error {
-                TransactionError::CustomError(e) => e,
-                TransactionError::Database(e) => {
-                    public_error_from_diesel(e, ErrorHandler::Server)
-                }
+            .map_err(|e| match err.take() {
+                Some(err) => err.into(),
+                None => public_error_from_diesel(e, ErrorHandler::Server),
             })?;
 
         info!(&opctx.log, "removed blueprint";
diff --git a/nexus/db-queries/src/db/datastore/dns.rs b/nexus/db-queries/src/db/datastore/dns.rs
index a691ce43aa..3f0f7828fa 100644
--- a/nexus/db-queries/src/db/datastore/dns.rs
+++ b/nexus/db-queries/src/db/datastore/dns.rs
@@ -19,6 +19,7 @@ use crate::db::pagination::paginated;
 use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
 use crate::db::TransactionError;
+use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
@@ -363,40 +364,49 @@ impl DataStore {
     ) -> Result<(), Error> {
         opctx.authorize(authz::Action::Modify, &authz::DNS_CONFIG).await?;
         let conn = self.pool_connection_authorized(opctx).await?;
-        conn.transaction_async(|c| async move {
-            let zones = self
-                .dns_zones_list_all_on_connection(opctx, &c, update.dns_group)
-                .await?;
-            // This looks like a time-of-check-to-time-of-use race, but this
-            // approach works because we're inside a transaction and the
-            // isolation level is SERIALIZABLE.
-            let version = self
-                .dns_group_latest_version_conn(opctx, &c, update.dns_group)
-                .await?;
-            if version.version != old_version {
-                return Err(TransactionError::CustomError(Error::conflict(
-                    format!(
-                        "expected current DNS version to be {}, found {}",
-                        *old_version, *version.version,
-                    ),
-                )));
-            }
 
-            self.dns_write_version_internal(
-                &c,
-                update,
-                zones,
-                Generation(old_version.next()),
-            )
+        let err = OptionalError::new();
+
+        self.transaction_retry_wrapper("dns_update_from_version")
+            .transaction(&conn, |c| {
+                let err = err.clone();
+                let update = update.clone();
+                async move {
+                    let zones = self
+                        .dns_zones_list_all_on_connection(opctx, &c, update.dns_group)
+                        .await
+                        .map_err(|txn_error| txn_error.into_diesel(&err))?;
+                    // This looks like a time-of-check-to-time-of-use race, but this
+                    // approach works because we're inside a transaction and the
+                    // isolation level is SERIALIZABLE.
+                    let version = self
+                        .dns_group_latest_version_conn(opctx, &c, update.dns_group)
+                        .await
+                        .map_err(|txn_error| txn_error.into_diesel(&err))?;
+                    if version.version != old_version {
+                        return Err(err.bail(TransactionError::CustomError(Error::conflict(
+                            format!(
+                                "expected current DNS version to be {}, found {}",
+                                *old_version, *version.version,
+                            ),
+                        ))));
+                    }
+
+                    self.dns_write_version_internal(
+                        &c,
+                        update,
+                        zones,
+                        Generation(old_version.next()),
+                    )
+                    .await
+                    .map_err(|txn_error| txn_error.into_diesel(&err))
+                }
+            })
             .await
-        })
-        .await
-        .map_err(|e| match e {
-            TransactionError::CustomError(e) => e,
-            TransactionError::Database(e) => {
-                public_error_from_diesel(e, ErrorHandler::Server)
-            }
-        })
+            .map_err(|e| match err.take() {
+                Some(err) => err.into(),
+                None => public_error_from_diesel(e, ErrorHandler::Server),
+            })
     }
 
     /// Update the configuration of a DNS zone as specified in `update`
@@ -441,6 +451,9 @@ impl DataStore {
             .dns_zones_list_all_on_connection(opctx, conn, update.dns_group)
             .await?;
 
+        // This method is used in nested transactions, which are not supported
+        // with retryable transactions.
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|c| async move {
             let version = self
                 .dns_group_latest_version_conn(opctx, conn, update.dns_group)
@@ -1724,6 +1737,8 @@ mod test {
 
             let cds = datastore.clone();
             let copctx = opctx.child(std::collections::BTreeMap::new());
+
+            #[allow(clippy::disallowed_methods)]
             let mut fut = conn1
                 .transaction_async(|c1| async move {
                     cds.dns_update_incremental(&copctx, &c1, update1)
diff --git a/nexus/db-queries/src/db/datastore/inventory.rs b/nexus/db-queries/src/db/datastore/inventory.rs
index a6e2a6cf2a..9269b233f3 100644
--- a/nexus/db-queries/src/db/datastore/inventory.rs
+++ b/nexus/db-queries/src/db/datastore/inventory.rs
@@ -11,7 +11,6 @@ use crate::db::error::public_error_from_diesel_lookup;
 use crate::db::error::ErrorHandler;
 use crate::db::pagination::{paginated, paginated_multicolumn, Paginator};
 use crate::db::queries::ALLOW_FULL_TABLE_SCAN_SQL;
-use crate::db::TransactionError;
 use anyhow::Context;
 use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
@@ -280,6 +279,11 @@ impl DataStore {
         // We'd do that if we had an interface for doing that with bound
         // parameters, etc.  See oxidecomputer/omicron#973.
         let pool = self.pool_connection_authorized(opctx).await?;
+
+        // The risk of a serialization error is possible here, but low,
+        // as most of the operations should be insertions rather than in-place
+        // modifications of existing tables.
+        #[allow(clippy::disallowed_methods)]
         pool.transaction_async(|conn| async move {
             // Insert records (and generate ids) for any baseboards that do not
             // already exist in the database.  These rows are not scoped to a
@@ -1242,6 +1246,7 @@ impl DataStore {
         // collection if we crash while deleting it.
         let conn = self.pool_connection_authorized(opctx).await?;
         let db_collection_id = to_db_typed_uuid(collection_id);
+
         let (
             ncollections,
             nsps,
@@ -1258,22 +1263,22 @@ impl DataStore {
             nzpools,
             nerrors,
             nclickhouse_keeper_membership,
-        ) = conn
-            .transaction_async(|conn| async move {
-                // Remove the record describing the collection itself.
-                let ncollections = {
-                    use db::schema::inv_collection::dsl;
-                    diesel::delete(
-                        dsl::inv_collection
-                            .filter(dsl::id.eq(db_collection_id)),
-                    )
-                    .execute_async(&conn)
-                    .await?
-                };
+        ) =
+            self.transaction_retry_wrapper("inventory_delete_collection")
+                .transaction(&conn, |conn| async move {
+                    // Remove the record describing the collection itself.
+                    let ncollections = {
+                        use db::schema::inv_collection::dsl;
+                        diesel::delete(
+                            dsl::inv_collection
+                                .filter(dsl::id.eq(db_collection_id)),
+                        )
+                        .execute_async(&conn)
+                        .await?
+                    };
 
-                // Remove rows for service processors.
-                let nsps =
-                    {
+                    // Remove rows for service processors.
+                    let nsps = {
                         use db::schema::inv_service_processor::dsl;
                         diesel::delete(dsl::inv_service_processor.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1282,9 +1287,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for roots of trust.
-                let nrots =
-                    {
+                    // Remove rows for roots of trust.
+                    let nrots = {
                         use db::schema::inv_root_of_trust::dsl;
                         diesel::delete(dsl::inv_root_of_trust.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1293,9 +1297,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for cabooses found.
-                let ncabooses =
-                    {
+                    // Remove rows for cabooses found.
+                    let ncabooses = {
                         use db::schema::inv_caboose::dsl;
                         diesel::delete(dsl::inv_caboose.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1304,9 +1307,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for root of trust pages found.
-                let nrot_pages =
-                    {
+                    // Remove rows for root of trust pages found.
+                    let nrot_pages = {
                         use db::schema::inv_root_of_trust_page::dsl;
                         diesel::delete(dsl::inv_root_of_trust_page.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1315,9 +1317,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for sled agents found.
-                let nsled_agents =
-                    {
+                    // Remove rows for sled agents found.
+                    let nsled_agents = {
                         use db::schema::inv_sled_agent::dsl;
                         diesel::delete(dsl::inv_sled_agent.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1326,9 +1327,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for datasets
-                let ndatasets =
-                    {
+                    // Remove rows for datasets
+                    let ndatasets = {
                         use db::schema::inv_dataset::dsl;
                         diesel::delete(dsl::inv_dataset.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1337,9 +1337,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for physical disks found.
-                let nphysical_disks =
-                    {
+                    // Remove rows for physical disks found.
+                    let nphysical_disks = {
                         use db::schema::inv_physical_disk::dsl;
                         diesel::delete(dsl::inv_physical_disk.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1348,9 +1347,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for NVMe physical disk firmware found.
-                let nnvme_disk_firwmare =
-                    {
+                    // Remove rows for NVMe physical disk firmware found.
+                    let nnvme_disk_firwmare = {
                         use db::schema::inv_nvme_disk_firmware::dsl;
                         diesel::delete(dsl::inv_nvme_disk_firmware.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1359,9 +1357,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows associated with Omicron zones
-                let nsled_agent_zones =
-                    {
+                    // Remove rows associated with Omicron zones
+                    let nsled_agent_zones = {
                         use db::schema::inv_sled_omicron_zones::dsl;
                         diesel::delete(dsl::inv_sled_omicron_zones.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1370,8 +1367,7 @@ impl DataStore {
                         .await?
                     };
 
-                let nzones =
-                    {
+                    let nzones = {
                         use db::schema::inv_omicron_zone::dsl;
                         diesel::delete(dsl::inv_omicron_zone.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1380,8 +1376,7 @@ impl DataStore {
                         .await?
                     };
 
-                let nnics =
-                    {
+                    let nnics = {
                         use db::schema::inv_omicron_zone_nic::dsl;
                         diesel::delete(dsl::inv_omicron_zone_nic.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1390,8 +1385,7 @@ impl DataStore {
                         .await?
                     };
 
-                let nzpools =
-                    {
+                    let nzpools = {
                         use db::schema::inv_zpool::dsl;
                         diesel::delete(dsl::inv_zpool.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1400,9 +1394,8 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for errors encountered.
-                let nerrors =
-                    {
+                    // Remove rows for errors encountered.
+                    let nerrors = {
                         use db::schema::inv_collection_error::dsl;
                         diesel::delete(dsl::inv_collection_error.filter(
                             dsl::inv_collection_id.eq(db_collection_id),
@@ -1411,43 +1404,40 @@ impl DataStore {
                         .await?
                     };
 
-                // Remove rows for clickhouse keeper membership
-                let nclickhouse_keeper_membership = {
-                    use db::schema::inv_clickhouse_keeper_membership::dsl;
-                    diesel::delete(
-                        dsl::inv_clickhouse_keeper_membership.filter(
-                            dsl::inv_collection_id.eq(db_collection_id),
-                        ),
-                    )
-                    .execute_async(&conn)
-                    .await?
-                };
-
-                Ok((
-                    ncollections,
-                    nsps,
-                    nrots,
-                    ncabooses,
-                    nrot_pages,
-                    nsled_agents,
-                    ndatasets,
-                    nphysical_disks,
-                    nnvme_disk_firwmare,
-                    nsled_agent_zones,
-                    nzones,
-                    nnics,
-                    nzpools,
-                    nerrors,
-                    nclickhouse_keeper_membership,
-                ))
-            })
-            .await
-            .map_err(|error| match error {
-                TransactionError::CustomError(e) => e,
-                TransactionError::Database(e) => {
-                    public_error_from_diesel(e, ErrorHandler::Server)
-                }
-            })?;
+                    // Remove rows for clickhouse keeper membership
+                    let nclickhouse_keeper_membership = {
+                        use db::schema::inv_clickhouse_keeper_membership::dsl;
+                        diesel::delete(
+                            dsl::inv_clickhouse_keeper_membership.filter(
+                                dsl::inv_collection_id.eq(db_collection_id),
+                            ),
+                        )
+                        .execute_async(&conn)
+                        .await?
+                    };
+
+                    Ok((
+                        ncollections,
+                        nsps,
+                        nrots,
+                        ncabooses,
+                        nrot_pages,
+                        nsled_agents,
+                        ndatasets,
+                        nphysical_disks,
+                        nnvme_disk_firwmare,
+                        nsled_agent_zones,
+                        nzones,
+                        nnics,
+                        nzpools,
+                        nerrors,
+                        nclickhouse_keeper_membership,
+                    ))
+                })
+                .await
+                .map_err(|error| {
+                    public_error_from_diesel(error, ErrorHandler::Server)
+                })?;
 
         info!(&opctx.log, "removed inventory collection";
             "collection_id" => collection_id.to_string(),
@@ -2429,6 +2419,9 @@ impl DataStoreInventoryTest for DataStore {
                 .pool_connection_for_tests()
                 .await
                 .context("getting connection")?;
+
+            // This transaction is used by tests, and does not need to retry.
+            #[allow(clippy::disallowed_methods)]
             conn.transaction_async(|conn| async move {
                 conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL)
                     .await
@@ -2484,6 +2477,8 @@ mod test {
 
     impl CollectionCounts {
         async fn new(conn: &DataStoreConnection) -> anyhow::Result<Self> {
+            // This transaction is used by tests, and does not need to retry.
+            #[allow(clippy::disallowed_methods)]
             conn.transaction_async(|conn| async move {
                 conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL)
                     .await
@@ -2933,6 +2928,8 @@ mod test {
             .expect("failed to delete collection");
         assert!(datastore.inventory_collections().await.unwrap().is_empty());
 
+        // This transaction is used by tests, and does not need to retry.
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|conn| async move {
             conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap();
             let count = schema::inv_collection::dsl::inv_collection
@@ -3055,6 +3052,8 @@ mod test {
             bail!("Tables missing from information_schema query");
         }
 
+        // This transaction is used by tests, and does not need to retry.
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|conn| async move {
             // We need this to call "COUNT(*)" below.
             conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL)
diff --git a/nexus/db-queries/src/db/datastore/ip_pool.rs b/nexus/db-queries/src/db/datastore/ip_pool.rs
index 9548003ee5..2409839eb4 100644
--- a/nexus/db-queries/src/db/datastore/ip_pool.rs
+++ b/nexus/db-queries/src/db/datastore/ip_pool.rs
@@ -30,7 +30,7 @@ use crate::db::pagination::Paginator;
 use crate::db::pool::DbConnection;
 use crate::db::queries::ip_pool::FilterOverlappingIpRanges;
 use crate::db::TransactionError;
-use async_bb8_diesel::AsyncConnection;
+use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -722,67 +722,90 @@ impl DataStore {
         }
         type TxnError = TransactionError<IpPoolResourceUpdateError>;
 
-        conn.transaction_async(|conn| async move {
-            // note this is matching the specified silo, but could be any pool
-            let existing_default_for_silo = dsl::ip_pool_resource
-                .filter(dsl::resource_type.eq(IpPoolResourceType::Silo))
-                .filter(dsl::resource_id.eq(silo_id))
-                .filter(dsl::is_default.eq(true))
-                .select(IpPoolResource::as_select())
-                .get_result_async(&conn)
-                .await;
-
-            // if there is an existing default, we need to unset it before we can
-            // set the new default
-            if let Ok(existing_default) = existing_default_for_silo {
-                // if the pool we're making default is already default for this
-                // silo, don't error: just noop
-                if existing_default.ip_pool_id == ip_pool_id {
-                    return Ok(existing_default);
-                }
+        let err = OptionalError::new();
+
+        self.transaction_retry_wrapper("ip_pool_set_default")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    // note this is matching the specified silo, but could be any pool
+                    let existing_default_for_silo = dsl::ip_pool_resource
+                        .filter(dsl::resource_type.eq(IpPoolResourceType::Silo))
+                        .filter(dsl::resource_id.eq(silo_id))
+                        .filter(dsl::is_default.eq(true))
+                        .select(IpPoolResource::as_select())
+                        .get_result_async(&conn)
+                        .await;
+
+                    // if there is an existing default, we need to unset it before we can
+                    // set the new default
+                    if let Ok(existing_default) = existing_default_for_silo {
+                        // if the pool we're making default is already default for this
+                        // silo, don't error: just noop
+                        if existing_default.ip_pool_id == ip_pool_id {
+                            return Ok(existing_default);
+                        }
+
+                        let unset_default =
+                            diesel::update(dsl::ip_pool_resource)
+                                .filter(
+                                    dsl::resource_id
+                                        .eq(existing_default.resource_id),
+                                )
+                                .filter(
+                                    dsl::ip_pool_id
+                                        .eq(existing_default.ip_pool_id),
+                                )
+                                .filter(
+                                    dsl::resource_type
+                                        .eq(existing_default.resource_type),
+                                )
+                                .set(dsl::is_default.eq(false))
+                                .execute_async(&conn)
+                                .await;
+                        if let Err(e) = unset_default {
+                            return Err(err.bail(TxnError::CustomError(
+                                IpPoolResourceUpdateError::FailedToUnsetDefault(
+                                    e,
+                                ),
+                            )));
+                        }
+                    }
 
-                let unset_default = diesel::update(dsl::ip_pool_resource)
-                    .filter(dsl::resource_id.eq(existing_default.resource_id))
-                    .filter(dsl::ip_pool_id.eq(existing_default.ip_pool_id))
-                    .filter(
-                        dsl::resource_type.eq(existing_default.resource_type),
+                    let updated_link = diesel::update(dsl::ip_pool_resource)
+                        .filter(dsl::resource_id.eq(silo_id))
+                        .filter(dsl::ip_pool_id.eq(ip_pool_id))
+                        .filter(dsl::resource_type.eq(IpPoolResourceType::Silo))
+                        .set(dsl::is_default.eq(true))
+                        .returning(IpPoolResource::as_returning())
+                        .get_result_async(&conn)
+                        .await?;
+                    Ok(updated_link)
+                }
+            })
+            .await
+            .map_err(|e| match err.take() {
+                Some(TxnError::CustomError(
+                    IpPoolResourceUpdateError::FailedToUnsetDefault(err),
+                )) => public_error_from_diesel(err, ErrorHandler::Server),
+                Some(TxnError::Database(err)) => {
+                    public_error_from_diesel(err, ErrorHandler::Server)
+                }
+                None => {
+                    public_error_from_diesel(
+                        e,
+                        ErrorHandler::NotFoundByLookup(
+                            ResourceType::IpPoolResource,
+                            // TODO: would be nice to put the actual names and/or ids in
+                            // here but LookupType on each of the two silos doesn't have
+                            // a nice to_string yet or a way of composing them
+                            LookupType::ByCompositeId(
+                                "(pool, silo)".to_string(),
+                            ),
+                        ),
                     )
-                    .set(dsl::is_default.eq(false))
-                    .execute_async(&conn)
-                    .await;
-                if let Err(e) = unset_default {
-                    return Err(TxnError::CustomError(
-                        IpPoolResourceUpdateError::FailedToUnsetDefault(e),
-                    ));
                 }
-            }
-
-            let updated_link = diesel::update(dsl::ip_pool_resource)
-                .filter(dsl::resource_id.eq(silo_id))
-                .filter(dsl::ip_pool_id.eq(ip_pool_id))
-                .filter(dsl::resource_type.eq(IpPoolResourceType::Silo))
-                .set(dsl::is_default.eq(true))
-                .returning(IpPoolResource::as_returning())
-                .get_result_async(&conn)
-                .await?;
-            Ok(updated_link)
-        })
-        .await
-        .map_err(|e| match e {
-            TransactionError::CustomError(
-                IpPoolResourceUpdateError::FailedToUnsetDefault(e),
-            ) => public_error_from_diesel(e, ErrorHandler::Server),
-            TransactionError::Database(e) => public_error_from_diesel(
-                e,
-                ErrorHandler::NotFoundByLookup(
-                    ResourceType::IpPoolResource,
-                    // TODO: would be nice to put the actual names and/or ids in
-                    // here but LookupType on each of the two silos doesn't have
-                    // a nice to_string yet or a way of composing them
-                    LookupType::ByCompositeId("(pool, silo)".to_string()),
-                ),
-            ),
-        })
+            })
     }
 
     /// Ephemeral and snat IPs are associated with a silo through an instance,
diff --git a/nexus/db-queries/src/db/datastore/rack.rs b/nexus/db-queries/src/db/datastore/rack.rs
index 74b3440a7d..dc3175c22d 100644
--- a/nexus/db-queries/src/db/datastore/rack.rs
+++ b/nexus/db-queries/src/db/datastore/rack.rs
@@ -674,8 +674,9 @@ impl DataStore {
         let log = opctx.log.clone();
         let err = Arc::new(OnceLock::new());
 
-        // NOTE: This transaction cannot yet be made retryable, as it uses
-        // nested transactions.
+        // This method uses nested transactions, which are not supported
+        // with retryable transactions.
+        #[allow(clippy::disallowed_methods)]
         let rack = self
             .pool_connection_authorized(opctx)
             .await?
diff --git a/nexus/db-queries/src/db/datastore/region_replacement.rs b/nexus/db-queries/src/db/datastore/region_replacement.rs
index de047d6d0c..0fda6b46ba 100644
--- a/nexus/db-queries/src/db/datastore/region_replacement.rs
+++ b/nexus/db-queries/src/db/datastore/region_replacement.rs
@@ -21,7 +21,7 @@ use crate::db::pagination::Paginator;
 use crate::db::update_and_check::UpdateAndCheck;
 use crate::db::update_and_check::UpdateStatus;
 use crate::db::TransactionError;
-use async_bb8_diesel::AsyncConnection;
+use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use omicron_common::api::external::Error;
@@ -52,21 +52,28 @@ impl DataStore {
         opctx: &OpContext,
         request: RegionReplacement,
     ) -> Result<(), Error> {
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
-                use db::schema::region_replacement::dsl;
+        let conn = self.pool_connection_authorized(opctx).await?;
 
-                Self::volume_repair_insert_query(request.volume_id, request.id)
-                    .execute_async(&conn)
-                    .await?;
+        self.transaction_retry_wrapper("insert_region_replacement_request")
+            .transaction(&conn, |conn| {
+                let request = request.clone();
+                async move {
+                    use db::schema::region_replacement::dsl;
 
-                diesel::insert_into(dsl::region_replacement)
-                    .values(request)
+                    Self::volume_repair_insert_query(
+                        request.volume_id,
+                        request.id,
+                    )
                     .execute_async(&conn)
                     .await?;
 
-                Ok(())
+                    diesel::insert_into(dsl::region_replacement)
+                        .values(request)
+                        .execute_async(&conn)
+                        .await?;
+
+                    Ok(())
+                }
             })
             .await
             .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
@@ -666,60 +673,62 @@ impl DataStore {
     ) -> Result<(), Error> {
         type TxnError = TransactionError<Error>;
 
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
-                Self::volume_repair_delete_query(
-                    request.volume_id,
-                    request.id,
-                )
-                .execute_async(&conn)
-                .await?;
-
-                use db::schema::region_replacement::dsl;
-
-                let result = diesel::update(dsl::region_replacement)
-                    .filter(dsl::id.eq(request.id))
-                    .filter(
-                        dsl::replacement_state.eq(RegionReplacementState::Completing),
+        let err = OptionalError::new();
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        self.transaction_retry_wrapper("set_region_replacement_complete")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    Self::volume_repair_delete_query(
+                        request.volume_id,
+                        request.id,
                     )
-                    .filter(dsl::operating_saga_id.eq(operating_saga_id))
-                    .set((
-                        dsl::replacement_state.eq(RegionReplacementState::Complete),
-                        dsl::operating_saga_id.eq(Option::<Uuid>::None),
-                    ))
-                    .check_if_exists::<RegionReplacement>(request.id)
-                    .execute_and_check(&conn)
+                    .execute_async(&conn)
                     .await?;
 
-                match result.status {
-                    UpdateStatus::Updated => Ok(()),
-                    UpdateStatus::NotUpdatedButExists => {
-                        let record = result.found;
-
-                        if record.operating_saga_id == None
-                            && record.replacement_state
-                                == RegionReplacementState::Complete
-                        {
-                            Ok(())
-                        } else {
-                            Err(TxnError::CustomError(Error::conflict(format!(
-                                "region replacement {} set to {:?} (operating saga id {:?})",
-                                request.id,
-                                record.replacement_state,
-                                record.operating_saga_id,
-                            ))))
+                    use db::schema::region_replacement::dsl;
+
+                    let result = diesel::update(dsl::region_replacement)
+                        .filter(dsl::id.eq(request.id))
+                        .filter(
+                            dsl::replacement_state.eq(RegionReplacementState::Completing),
+                        )
+                        .filter(dsl::operating_saga_id.eq(operating_saga_id))
+                        .set((
+                            dsl::replacement_state.eq(RegionReplacementState::Complete),
+                            dsl::operating_saga_id.eq(Option::<Uuid>::None),
+                        ))
+                        .check_if_exists::<RegionReplacement>(request.id)
+                        .execute_and_check(&conn)
+                        .await?;
+
+                    match result.status {
+                        UpdateStatus::Updated => Ok(()),
+                        UpdateStatus::NotUpdatedButExists => {
+                            let record = result.found;
+
+                            if record.operating_saga_id == None
+                                && record.replacement_state
+                                    == RegionReplacementState::Complete
+                            {
+                                Ok(())
+                            } else {
+                                Err(err.bail(TxnError::from(Error::conflict(format!(
+                                    "region replacement {} set to {:?} (operating saga id {:?})",
+                                    request.id,
+                                    record.replacement_state,
+                                    record.operating_saga_id,
+                                )))))
+                            }
                         }
                     }
                 }
             })
             .await
-            .map_err(|e| match e {
-                TxnError::CustomError(error) => error,
-
-                TxnError::Database(error) => {
-                    public_error_from_diesel(error, ErrorHandler::Server)
-                }
+            .map_err(|e| match err.take() {
+                Some(err) => err.into(),
+                None => public_error_from_diesel(e, ErrorHandler::Server),
             })
     }
 
@@ -738,57 +747,59 @@ impl DataStore {
             RegionReplacementState::Requested,
         );
 
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
-                Self::volume_repair_delete_query(
-                    request.volume_id,
-                    request.id,
-                )
-                .execute_async(&conn)
-                .await?;
-
-                use db::schema::region_replacement::dsl;
-
-                let result = diesel::update(dsl::region_replacement)
-                    .filter(dsl::id.eq(request.id))
-                    .filter(
-                        dsl::replacement_state.eq(RegionReplacementState::Requested),
+        let err = OptionalError::new();
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        self.transaction_retry_wrapper("set_region_replacement_complete_from_requested")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    Self::volume_repair_delete_query(
+                        request.volume_id,
+                        request.id,
                     )
-                    .filter(dsl::operating_saga_id.is_null())
-                    .set((
-                        dsl::replacement_state.eq(RegionReplacementState::Complete),
-                    ))
-                    .check_if_exists::<RegionReplacement>(request.id)
-                    .execute_and_check(&conn)
+                    .execute_async(&conn)
                     .await?;
 
-                match result.status {
-                    UpdateStatus::Updated => Ok(()),
-
-                    UpdateStatus::NotUpdatedButExists => {
-                        let record = result.found;
-
-                        if record.replacement_state == RegionReplacementState::Complete {
-                            Ok(())
-                        } else {
-                            Err(TxnError::CustomError(Error::conflict(format!(
-                                "region replacement {} set to {:?} (operating saga id {:?})",
-                                request.id,
-                                record.replacement_state,
-                                record.operating_saga_id,
-                            ))))
+                    use db::schema::region_replacement::dsl;
+
+                    let result = diesel::update(dsl::region_replacement)
+                        .filter(dsl::id.eq(request.id))
+                        .filter(
+                            dsl::replacement_state.eq(RegionReplacementState::Requested),
+                        )
+                        .filter(dsl::operating_saga_id.is_null())
+                        .set((
+                            dsl::replacement_state.eq(RegionReplacementState::Complete),
+                        ))
+                        .check_if_exists::<RegionReplacement>(request.id)
+                        .execute_and_check(&conn)
+                        .await?;
+
+                    match result.status {
+                        UpdateStatus::Updated => Ok(()),
+
+                        UpdateStatus::NotUpdatedButExists => {
+                            let record = result.found;
+
+                            if record.replacement_state == RegionReplacementState::Complete {
+                                Ok(())
+                            } else {
+                                Err(err.bail(TxnError::from(Error::conflict(format!(
+                                    "region replacement {} set to {:?} (operating saga id {:?})",
+                                    request.id,
+                                    record.replacement_state,
+                                    record.operating_saga_id,
+                                )))))
+                            }
                         }
                     }
                 }
             })
             .await
-            .map_err(|e| match e {
-                TxnError::CustomError(error) => error,
-
-                TxnError::Database(error) => {
-                    public_error_from_diesel(error, ErrorHandler::Server)
-                }
+            .map_err(|e| match err.take() {
+                Some(err) => err.into(),
+                None => public_error_from_diesel(e, ErrorHandler::Server),
             })
     }
 
diff --git a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
index b970bb8962..76a83cca2a 100644
--- a/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
+++ b/nexus/db-queries/src/db/datastore/region_snapshot_replacement.rs
@@ -11,7 +11,6 @@ use crate::db;
 use crate::db::datastore::SQL_BATCH_SIZE;
 use crate::db::error::public_error_from_diesel;
 use crate::db::error::ErrorHandler;
-use crate::db::lookup::LookupPath;
 use crate::db::model::RegionSnapshot;
 use crate::db::model::RegionSnapshotReplacement;
 use crate::db::model::RegionSnapshotReplacementState;
@@ -23,7 +22,7 @@ use crate::db::pagination::Paginator;
 use crate::db::update_and_check::UpdateAndCheck;
 use crate::db::update_and_check::UpdateStatus;
 use crate::db::TransactionError;
-use async_bb8_diesel::AsyncConnection;
+use crate::transaction_retry::OptionalError;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use diesel::prelude::*;
 use omicron_common::api::external::Error;
@@ -64,10 +63,19 @@ impl DataStore {
         opctx: &OpContext,
         request: RegionSnapshotReplacement,
     ) -> Result<(), Error> {
-        let (.., db_snapshot) = LookupPath::new(opctx, &self)
-            .snapshot_id(request.old_snapshot_id)
-            .fetch()
-            .await?;
+        // Note: if `LookupPath` is used here, it will not be able to retrieve
+        // deleted snapshots
+        let db_snapshot = match self
+            .snapshot_get(opctx, request.old_snapshot_id)
+            .await?
+        {
+            Some(db_snapshot) => db_snapshot,
+            None => {
+                return Err(Error::internal_error(
+                    "cannot perform region snapshot replacement without snapshot volume"
+                ));
+            }
+        };
 
         self.insert_region_snapshot_replacement_request_with_volume_id(
             opctx,
@@ -85,9 +93,14 @@ impl DataStore {
         request: RegionSnapshotReplacement,
         volume_id: Uuid,
     ) -> Result<(), Error> {
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        self.transaction_retry_wrapper(
+            "insert_region_snapshot_replacement_request_with_volume_id",
+        )
+        .transaction(&conn, |conn| {
+            let request = request.clone();
+            async move {
                 use db::schema::region_snapshot_replacement::dsl;
                 use db::schema::volume_repair::dsl as volume_repair_dsl;
 
@@ -108,9 +121,10 @@ impl DataStore {
                     .await?;
 
                 Ok(())
-            })
-            .await
-            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
+            }
+        })
+        .await
+        .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))
     }
 
     pub async fn get_region_snapshot_replacement_request_by_id(
@@ -555,67 +569,69 @@ impl DataStore {
     ) -> Result<(), Error> {
         type TxnError = TransactionError<Error>;
 
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
-                use db::schema::volume_repair::dsl as volume_repair_dsl;
-
-                diesel::delete(
-                    volume_repair_dsl::volume_repair.filter(
-                        volume_repair_dsl::repair_id
-                            .eq(region_snapshot_replacement_id),
-                    ),
-                )
-                .execute_async(&conn)
-                .await?;
-
-                use db::schema::region_snapshot_replacement::dsl;
+        let err = OptionalError::new();
+        let conn = self.pool_connection_authorized(opctx).await?;
 
-                let result = diesel::update(dsl::region_snapshot_replacement)
-                    .filter(dsl::id.eq(region_snapshot_replacement_id))
-                    .filter(
-                        dsl::replacement_state
-                            .eq(RegionSnapshotReplacementState::Running),
+        self.transaction_retry_wrapper("set_region_snapshot_replacement_complete")
+            .transaction(&conn, |conn| {
+                let err = err.clone();
+                async move {
+                    use db::schema::volume_repair::dsl as volume_repair_dsl;
+
+                    diesel::delete(
+                        volume_repair_dsl::volume_repair.filter(
+                            volume_repair_dsl::repair_id
+                                .eq(region_snapshot_replacement_id),
+                        ),
                     )
-                    .filter(dsl::operating_saga_id.is_null())
-                    .set((dsl::replacement_state
-                        .eq(RegionSnapshotReplacementState::Complete),))
-                    .check_if_exists::<RegionSnapshotReplacement>(
-                        region_snapshot_replacement_id,
-                    )
-                    .execute_and_check(&conn)
+                    .execute_async(&conn)
                     .await?;
 
-                match result.status {
-                    UpdateStatus::Updated => Ok(()),
-                    UpdateStatus::NotUpdatedButExists => {
-                        let record = result.found;
+                    use db::schema::region_snapshot_replacement::dsl;
 
-                        if record.replacement_state
-                            == RegionSnapshotReplacementState::Complete
-                        {
-                            Ok(())
-                        } else {
-                            Err(TxnError::CustomError(Error::conflict(
-                                format!(
-                                "region snapshot replacement {} set to {:?} \
-                                (operating saga id {:?})",
-                                region_snapshot_replacement_id,
-                                record.replacement_state,
-                                record.operating_saga_id,
-                            ),
-                            )))
+                    let result = diesel::update(dsl::region_snapshot_replacement)
+                        .filter(dsl::id.eq(region_snapshot_replacement_id))
+                        .filter(
+                            dsl::replacement_state
+                                .eq(RegionSnapshotReplacementState::Running),
+                        )
+                        .filter(dsl::operating_saga_id.is_null())
+                        .set((dsl::replacement_state
+                            .eq(RegionSnapshotReplacementState::Complete),))
+                        .check_if_exists::<RegionSnapshotReplacement>(
+                            region_snapshot_replacement_id,
+                        )
+                        .execute_and_check(&conn)
+                        .await?;
+
+                    match result.status {
+                        UpdateStatus::Updated => Ok(()),
+                        UpdateStatus::NotUpdatedButExists => {
+                            let record = result.found;
+
+                            if record.replacement_state
+                                == RegionSnapshotReplacementState::Complete
+                            {
+                                Ok(())
+                            } else {
+                                Err(err.bail(TxnError::from(Error::conflict(
+                                    format!(
+                                    "region snapshot replacement {} set to {:?} \
+                                    (operating saga id {:?})",
+                                    region_snapshot_replacement_id,
+                                    record.replacement_state,
+                                    record.operating_saga_id,
+                                ),
+                                ))))
+                            }
                         }
                     }
                 }
             })
             .await
-            .map_err(|e| match e {
-                TxnError::CustomError(error) => error,
-
-                TxnError::Database(error) => {
-                    public_error_from_diesel(error, ErrorHandler::Server)
-                }
+            .map_err(|e| match err.take() {
+                Some(err) => err.into(),
+                None => public_error_from_diesel(e, ErrorHandler::Server),
             })
     }
 
@@ -885,9 +901,15 @@ impl DataStore {
     ) -> Result<(), Error> {
         type TxnError = TransactionError<Error>;
 
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+        let err = OptionalError::new();
+        let conn = self.pool_connection_authorized(opctx).await?;
+
+        self.transaction_retry_wrapper(
+            "set_region_snapshot_replacement_step_complete",
+        )
+        .transaction(&conn, |conn| {
+            let err = err.clone();
+            async move {
                 use db::schema::volume_repair::dsl as volume_repair_dsl;
 
                 diesel::delete(
@@ -935,27 +957,25 @@ impl DataStore {
                         {
                             Ok(())
                         } else {
-                            Err(TxnError::CustomError(Error::conflict(
+                            Err(err.bail(TxnError::from(Error::conflict(
                                 format!(
                                     "region snapshot replacement step {} set \
-                                    to {:?} (operating saga id {:?})",
+                                        to {:?} (operating saga id {:?})",
                                     region_snapshot_replacement_step_id,
                                     record.replacement_state,
                                     record.operating_saga_id,
                                 ),
-                            )))
+                            ))))
                         }
                     }
                 }
-            })
-            .await
-            .map_err(|e| match e {
-                TxnError::CustomError(error) => error,
-
-                TxnError::Database(error) => {
-                    public_error_from_diesel(error, ErrorHandler::Server)
-                }
-            })
+            }
+        })
+        .await
+        .map_err(|e| match err.take() {
+            Some(err) => err.into(),
+            None => public_error_from_diesel(e, ErrorHandler::Server),
+        })
     }
 
     /// Count all in-progress region snapshot replacement steps for a particular
diff --git a/nexus/db-queries/src/db/datastore/role.rs b/nexus/db-queries/src/db/datastore/role.rs
index b91597ad1d..ed8ec6fcd9 100644
--- a/nexus/db-queries/src/db/datastore/role.rs
+++ b/nexus/db-queries/src/db/datastore/role.rs
@@ -209,6 +209,11 @@ impl DataStore {
         // We might instead want to first-class the idea of Policies in the
         // database so that we can build up a whole new Policy in batches and
         // then flip the resource over to using it.
+
+        // This method should probably be retryable, but this is slightly
+        // complicated by the cloning semantics of the queries, which
+        // must be Clone to be retried.
+        #[allow(clippy::disallowed_methods)]
         self.pool_connection_authorized(opctx)
             .await?
             .transaction_async(|conn| async move {
diff --git a/nexus/db-queries/src/db/datastore/saga.rs b/nexus/db-queries/src/db/datastore/saga.rs
index 4bc212e997..87d94e2377 100644
--- a/nexus/db-queries/src/db/datastore/saga.rs
+++ b/nexus/db-queries/src/db/datastore/saga.rs
@@ -654,6 +654,7 @@ mod test {
             .expect("failed to re-assign sagas");
 
         // Fetch all the sagas and check their states.
+        #[allow(clippy::disallowed_methods)]
         let all_sagas: Vec<_> = datastore
             .pool_connection_for_tests()
             .await
diff --git a/nexus/db-queries/src/db/datastore/silo.rs b/nexus/db-queries/src/db/datastore/silo.rs
index 2b7afa3270..b862f3c461 100644
--- a/nexus/db-queries/src/db/datastore/silo.rs
+++ b/nexus/db-queries/src/db/datastore/silo.rs
@@ -67,10 +67,11 @@ impl DataStore {
 
         use db::schema::silo::dsl;
         use db::schema::silo_quotas::dsl as quotas_dsl;
+        let conn = self.pool_connection_authorized(opctx).await?;
+
         let count = self
-            .pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|conn| async move {
+            .transaction_retry_wrapper("load_builtin_silos")
+            .transaction(&conn, |conn| async move {
                 diesel::insert_into(quotas_dsl::silo_quotas)
                     .values(SiloQuotas::arbitrarily_high_default(
                         DEFAULT_SILO.id(),
@@ -78,19 +79,17 @@ impl DataStore {
                     .on_conflict(quotas_dsl::silo_id)
                     .do_nothing()
                     .execute_async(&conn)
-                    .await
-                    .map_err(TransactionError::CustomError)
-                    .unwrap();
-                diesel::insert_into(dsl::silo)
+                    .await?;
+                let count = diesel::insert_into(dsl::silo)
                     .values([&*DEFAULT_SILO, &*INTERNAL_SILO])
                     .on_conflict(dsl::id)
                     .do_nothing()
                     .execute_async(&conn)
-                    .await
-                    .map_err(TransactionError::CustomError)
+                    .await?;
+                Ok(count)
             })
             .await
-            .unwrap();
+            .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server))?;
 
         info!(opctx.log, "created {} built-in silos", count);
 
@@ -226,6 +225,9 @@ impl DataStore {
                 None
             };
 
+        // This method uses nested transactions, which are not supported
+        // with retryable transactions.
+        #[allow(clippy::disallowed_methods)]
         let silo = conn
             .transaction_async(|conn| async move {
                 let silo = silo_create_query
@@ -424,6 +426,10 @@ impl DataStore {
         let now = Utc::now();
 
         type TxnError = TransactionError<Error>;
+
+        // This method uses nested transactions, which are not supported
+        // with retryable transactions.
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|conn| async move {
             let updated_rows = diesel::update(silo::dsl::silo)
                 .filter(silo::dsl::time_deleted.is_null())
diff --git a/nexus/db-queries/src/db/datastore/silo_group.rs b/nexus/db-queries/src/db/datastore/silo_group.rs
index b8ef759116..e6168f4e42 100644
--- a/nexus/db-queries/src/db/datastore/silo_group.rs
+++ b/nexus/db-queries/src/db/datastore/silo_group.rs
@@ -199,6 +199,8 @@ impl DataStore {
 
         let group_id = authz_silo_group.id();
 
+        // Prefer to use "transaction_retry_wrapper"
+        #[allow(clippy::disallowed_methods)]
         self.pool_connection_authorized(opctx)
             .await?
             .transaction_async(|conn| async move {
diff --git a/nexus/db-queries/src/db/datastore/silo_user.rs b/nexus/db-queries/src/db/datastore/silo_user.rs
index 2825e2a310..40f6b3f0be 100644
--- a/nexus/db-queries/src/db/datastore/silo_user.rs
+++ b/nexus/db-queries/src/db/datastore/silo_user.rs
@@ -21,7 +21,6 @@ use crate::db::model::UserBuiltin;
 use crate::db::model::UserProvisionType;
 use crate::db::pagination::paginated;
 use crate::db::update_and_check::UpdateAndCheck;
-use async_bb8_diesel::AsyncConnection;
 use async_bb8_diesel::AsyncRunQueryDsl;
 use chrono::Utc;
 use diesel::prelude::*;
@@ -92,9 +91,10 @@ impl DataStore {
         // TODO-robustness We might consider the RFD 192 "rcgen" pattern as well
         // so that people can't, say, login while we do this.
         let authz_silo_user_id = authz_silo_user.id();
-        self.pool_connection_authorized(opctx)
-            .await?
-            .transaction_async(|mut conn| async move {
+
+        let conn = self.pool_connection_authorized(opctx).await?;
+        self.transaction_retry_wrapper("silo_user_delete")
+            .transaction(&conn, |conn| async move {
                 // Delete the user record.
                 {
                     use db::schema::silo_user::dsl;
@@ -103,7 +103,7 @@ impl DataStore {
                         .filter(dsl::time_deleted.is_null())
                         .set(dsl::time_deleted.eq(Utc::now()))
                         .check_if_exists::<SiloUser>(authz_silo_user_id)
-                        .execute_and_check(&mut conn)
+                        .execute_and_check(&conn)
                         .await?;
                 }
 
@@ -112,7 +112,7 @@ impl DataStore {
                     use db::schema::console_session::dsl;
                     diesel::delete(dsl::console_session)
                         .filter(dsl::silo_user_id.eq(authz_silo_user_id))
-                        .execute_async(&mut conn)
+                        .execute_async(&conn)
                         .await?;
                 }
 
@@ -121,7 +121,7 @@ impl DataStore {
                     use db::schema::device_access_token::dsl;
                     diesel::delete(dsl::device_access_token)
                         .filter(dsl::silo_user_id.eq(authz_silo_user_id))
-                        .execute_async(&mut conn)
+                        .execute_async(&conn)
                         .await?;
                 }
 
@@ -130,7 +130,7 @@ impl DataStore {
                     use db::schema::silo_group_membership::dsl;
                     diesel::delete(dsl::silo_group_membership)
                         .filter(dsl::silo_user_id.eq(authz_silo_user_id))
-                        .execute_async(&mut conn)
+                        .execute_async(&conn)
                         .await?;
                 }
 
@@ -141,7 +141,7 @@ impl DataStore {
                         .filter(dsl::silo_user_id.eq(authz_silo_user_id))
                         .filter(dsl::time_deleted.is_null())
                         .set(dsl::time_deleted.eq(Utc::now()))
-                        .execute_async(&mut conn)
+                        .execute_async(&conn)
                         .await?;
                 }
 
diff --git a/nexus/db-queries/src/db/pagination.rs b/nexus/db-queries/src/db/pagination.rs
index 01911eb802..1929632980 100644
--- a/nexus/db-queries/src/db/pagination.rs
+++ b/nexus/db-queries/src/db/pagination.rs
@@ -679,6 +679,7 @@ mod test {
             pagparams: &DataPageParams<'_, (i64, i64)>,
         ) -> Vec<UserAndPhoneNumber> {
             let conn = pool.claim().await.unwrap();
+            #[allow(clippy::disallowed_methods)]
             conn.transaction_async(|conn| async move {
                 // I couldn't figure out how to make this work without requiring a full
                 // table scan, and I just want the test to work so that I can get on
diff --git a/nexus/external-api/output/nexus_tags.txt b/nexus/external-api/output/nexus_tags.txt
index 8102ebce08..a979a9804b 100644
--- a/nexus/external-api/output/nexus_tags.txt
+++ b/nexus/external-api/output/nexus_tags.txt
@@ -30,6 +30,7 @@ probe_create                             POST     /experimental/v1/probes
 probe_delete                             DELETE   /experimental/v1/probes/{probe}
 probe_list                               GET      /experimental/v1/probes
 probe_view                               GET      /experimental/v1/probes/{probe}
+timeseries_query                         POST     /v1/timeseries/query
 
 API operations found with tag "images"
 OPERATION ID                             METHOD   URL PATH
diff --git a/nexus/external-api/src/lib.rs b/nexus/external-api/src/lib.rs
index 1c5c7c1d2d..e2b53a7e6f 100644
--- a/nexus/external-api/src/lib.rs
+++ b/nexus/external-api/src/lib.rs
@@ -2567,6 +2567,26 @@ pub trait NexusExternalApi {
         body: TypedBody<params::TimeseriesQuery>,
     ) -> Result<HttpResponseOk<views::OxqlQueryResult>, HttpError>;
 
+    // TODO: list endpoint for project-scoped schemas is blocked on
+    // https://github.com/oxidecomputer/omicron/issues/5942: the authz scope for
+    // each schema is not stored in Clickhouse yet.
+
+    /// Run project-scoped timeseries query
+    ///
+    /// Queries are written in OxQL. Project must be specified by name or ID in
+    /// URL query parameter. The OxQL query will only return timeseries data
+    /// from the specified project.
+    #[endpoint {
+        method = POST,
+        path = "/v1/timeseries/query",
+        tags = ["hidden"],
+    }]
+    async fn timeseries_query(
+        rqctx: RequestContext<Self::Context>,
+        query_params: Query<params::ProjectSelector>,
+        body: TypedBody<params::TimeseriesQuery>,
+    ) -> Result<HttpResponseOk<views::OxqlQueryResult>, HttpError>;
+
     // Updates
 
     /// Upload TUF repository
diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml
index 19e429dcd9..43a65ad085 100644
--- a/nexus/reconfigurator/planning/Cargo.toml
+++ b/nexus/reconfigurator/planning/Cargo.toml
@@ -39,6 +39,7 @@ omicron-workspace-hack.workspace = true
 [dev-dependencies]
 expectorate.workspace = true
 maplit.workspace = true
+omicron-common = { workspace = true, features = ["testing"] }
 omicron-test-utils.workspace = true
 proptest.workspace = true
 test-strategy.workspace = true
diff --git a/nexus/reconfigurator/planning/proptest-regressions/blueprint_editor/sled_editor/datasets.txt b/nexus/reconfigurator/planning/proptest-regressions/blueprint_editor/sled_editor/datasets.txt
new file mode 100644
index 0000000000..bee50f1683
--- /dev/null
+++ b/nexus/reconfigurator/planning/proptest-regressions/blueprint_editor/sled_editor/datasets.txt
@@ -0,0 +1,7 @@
+# Seeds for failure cases proptest has generated in the past. It is
+# automatically read and these particular cases re-run before any
+# novel cases are generated.
+#
+# It is recommended to check this file in to source control so that
+# everyone who runs the test benefits from these saved cases.
+cc a3c842ed34d27e4c78fb52fd718cfcc038942eca49672c53e126a1062f5db3ac # shrinks to input = _ProptestNamefixmeArgs { values: [[Cockroach]] }
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
index eb50ab19fd..394133132b 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/builder.rs
@@ -4,21 +4,28 @@
 
 //! Low-level facility for generating Blueprints
 
+use crate::blueprint_editor::DatasetIdsBackfillFromDb;
+use crate::blueprint_editor::EditedSled;
+use crate::blueprint_editor::SledEditError;
+use crate::blueprint_editor::SledEditor;
 use crate::ip_allocator::IpAllocator;
 use crate::planner::rng::PlannerRng;
 use crate::planner::zone_needs_expungement;
 use crate::planner::ZoneExpungeReason;
 use anyhow::anyhow;
+use anyhow::bail;
+use anyhow::Context as _;
 use clickhouse_admin_types::OXIMETER_CLUSTER;
-use datasets_editor::BlueprintDatasetsEditError;
 use ipnet::IpAdd;
 use nexus_inventory::now_db_precision;
 use nexus_sled_agent_shared::inventory::OmicronZoneDataset;
 use nexus_sled_agent_shared::inventory::ZoneKind;
 use nexus_types::deployment::blueprint_zone_type;
 use nexus_types::deployment::Blueprint;
+use nexus_types::deployment::BlueprintDatasetsConfig;
 use nexus_types::deployment::BlueprintPhysicalDiskConfig;
 use nexus_types::deployment::BlueprintPhysicalDiskDisposition;
+use nexus_types::deployment::BlueprintPhysicalDisksConfig;
 use nexus_types::deployment::BlueprintZoneConfig;
 use nexus_types::deployment::BlueprintZoneDisposition;
 use nexus_types::deployment::BlueprintZoneFilter;
@@ -33,6 +40,7 @@ use nexus_types::deployment::OmicronZoneExternalSnatIp;
 use nexus_types::deployment::PlanningInput;
 use nexus_types::deployment::SledDetails;
 use nexus_types::deployment::SledFilter;
+use nexus_types::deployment::SledLookupErrorKind;
 use nexus_types::deployment::SledResources;
 use nexus_types::deployment::ZpoolFilter;
 use nexus_types::deployment::ZpoolName;
@@ -62,16 +70,16 @@ use slog::error;
 use slog::info;
 use slog::o;
 use slog::Logger;
+use std::collections::btree_map::Entry;
 use std::collections::BTreeMap;
 use std::collections::BTreeSet;
 use std::collections::HashSet;
 use std::fmt;
-use std::mem;
+use std::iter;
 use std::net::IpAddr;
 use std::net::Ipv6Addr;
 use std::net::SocketAddr;
 use std::net::SocketAddrV6;
-use storage_editor::BlueprintStorageEditor;
 use thiserror::Error;
 
 use super::clickhouse::ClickhouseAllocator;
@@ -80,13 +88,6 @@ use super::external_networking::BuilderExternalNetworking;
 use super::external_networking::ExternalNetworkingChoice;
 use super::external_networking::ExternalSnatNetworkingChoice;
 use super::internal_dns::DnsSubnetAllocator;
-use super::zones::is_already_expunged;
-use super::zones::BuilderZoneState;
-use super::zones::BuilderZonesConfig;
-
-mod datasets_editor;
-mod disks_editor;
-mod storage_editor;
 
 /// Errors encountered while assembling blueprints
 #[derive(Debug, Error)]
@@ -125,8 +126,12 @@ pub enum Error {
     TooManyDnsServers,
     #[error("planner produced too many {kind:?} zones")]
     TooManyZones { kind: ZoneKind },
-    #[error(transparent)]
-    BlueprintDatasetsEditError(#[from] BlueprintDatasetsEditError),
+    #[error("error editing sled {sled_id}")]
+    SledEditError {
+        sled_id: SledUuid,
+        #[source]
+        err: SledEditError,
+    },
 }
 
 /// Describes the result of an idempotent "ensure" operation
@@ -197,12 +202,12 @@ impl EditCounts {
         *self != Self::zeroes()
     }
 
-    pub fn accum(self, other: Self) -> Self {
+    pub fn difference_since(self, other: Self) -> Self {
         Self {
-            added: self.added + other.added,
-            updated: self.updated + other.updated,
-            expunged: self.expunged + other.expunged,
-            removed: self.removed + other.removed,
+            added: self.added - other.added,
+            updated: self.updated - other.updated,
+            expunged: self.expunged - other.expunged,
+            removed: self.removed - other.removed,
         }
     }
 }
@@ -223,11 +228,18 @@ pub struct SledEditCounts {
 }
 
 impl SledEditCounts {
-    fn accum(self, other: Self) -> Self {
+    fn has_nonzero_counts(&self) -> bool {
+        let Self { disks, datasets, zones } = self;
+        disks.has_nonzero_counts()
+            || datasets.has_nonzero_counts()
+            || zones.has_nonzero_counts()
+    }
+
+    fn difference_since(self, other: Self) -> Self {
         Self {
-            disks: self.disks.accum(other.disks),
-            datasets: self.datasets.accum(other.datasets),
-            zones: self.zones.accum(other.zones),
+            disks: self.disks.difference_since(other.disks),
+            datasets: self.datasets.difference_since(other.datasets),
+            zones: self.zones.difference_since(other.zones),
         }
     }
 }
@@ -349,9 +361,7 @@ pub struct BlueprintBuilder<'a> {
 
     // These fields will become part of the final blueprint.  See the
     // corresponding fields in `Blueprint`.
-    pub(super) zones: BlueprintZonesBuilder<'a>,
-    storage: BlueprintStorageEditor,
-    sled_state: BTreeMap<SledUuid, SledState>,
+    sled_editors: BTreeMap<SledUuid, SledEditor>,
     cockroachdb_setting_preserve_downgrade: CockroachDbPreserveDowngrade,
 
     creator: String,
@@ -400,6 +410,28 @@ impl<'a> BlueprintBuilder<'a> {
                 (sled_id, config)
             })
             .collect::<BTreeMap<_, _>>();
+        let blueprint_disks = blueprint_zones
+            .keys()
+            .copied()
+            .map(|sled_id| {
+                let config = BlueprintPhysicalDisksConfig {
+                    generation: Generation::new(),
+                    disks: Vec::new(),
+                };
+                (sled_id, config)
+            })
+            .collect();
+        let blueprint_datasets = blueprint_zones
+            .keys()
+            .copied()
+            .map(|sled_id| {
+                let config = BlueprintDatasetsConfig {
+                    generation: Generation::new(),
+                    datasets: BTreeMap::new(),
+                };
+                (sled_id, config)
+            })
+            .collect();
         let num_sleds = blueprint_zones.len();
         let sled_state = blueprint_zones
             .keys()
@@ -410,8 +442,8 @@ impl<'a> BlueprintBuilder<'a> {
         Blueprint {
             id: rng.next_blueprint(),
             blueprint_zones,
-            blueprint_disks: BTreeMap::new(),
-            blueprint_datasets: BTreeMap::new(),
+            blueprint_disks,
+            blueprint_datasets,
             sled_state,
             parent_blueprint_id: None,
             internal_dns_version: Generation::new(),
@@ -440,30 +472,105 @@ impl<'a> BlueprintBuilder<'a> {
             "parent_id" => parent_blueprint.id.to_string(),
         ));
 
-        // Prefer the sled state from our parent blueprint for sleds
-        // that were in it; there may be new sleds in `input`, in which
-        // case we'll use their current state as our starting point.
-        let mut sled_state = parent_blueprint.sled_state.clone();
-        let mut commissioned_sled_ids = BTreeSet::new();
-        for (sled_id, details) in input.all_sleds(SledFilter::Commissioned) {
-            commissioned_sled_ids.insert(sled_id);
-            sled_state.entry(sled_id).or_insert(details.state);
+        // Helper to build a `PreexistingDatasetIds` for a given sled. This will
+        // go away with https://github.com/oxidecomputer/omicron/issues/6645.
+        let build_preexisting_dataset_ids =
+            |sled_id| -> anyhow::Result<DatasetIdsBackfillFromDb> {
+                match input.sled_lookup(SledFilter::All, sled_id) {
+                    Ok(details) => {
+                        DatasetIdsBackfillFromDb::build(&details.resources)
+                            .with_context(|| {
+                                format!(
+                                    "failed building map of preexisting \
+                             dataset IDs for sled {sled_id}"
+                                )
+                            })
+                    }
+                    Err(err) => match err.kind() {
+                        SledLookupErrorKind::Missing => {
+                            Ok(DatasetIdsBackfillFromDb::empty())
+                        }
+                        SledLookupErrorKind::Filtered { .. } => unreachable!(
+                            "SledFilter::All should not filter anything out"
+                        ),
+                    },
+                }
+            };
+
+        // Squish the disparate maps in our parent blueprint into one map of
+        // `SledEditor`s.
+        let mut sled_editors = BTreeMap::new();
+        for (sled_id, zones) in &parent_blueprint.blueprint_zones {
+            // Prefer the sled state from our parent blueprint for sleds
+            // that were in it.
+            let state = match parent_blueprint.sled_state.get(sled_id).copied()
+            {
+                Some(state) => state,
+                None => {
+                    // If we have zones but no state for a sled, we assume
+                    // it was removed by an earlier version of the planner
+                    // (which pruned decommissioned sleds from
+                    // `sled_state`). Check that all of its zones are
+                    // expunged, which is a prerequisite for
+                    // decommissioning. If any zones aren't, then we don't
+                    // know what to do: the state is missing but we can't
+                    // assume "decommissioned", so fail.
+                    if zones.are_all_zones_expunged() {
+                        SledState::Decommissioned
+                    } else {
+                        bail!(
+                            "sled {sled_id} is missing in parent blueprint \
+                             sled_state map, but has non-expunged zones"
+                        );
+                    }
+                }
+            };
+
+            // If we don't have disks/datasets entries, we'll start with an
+            // empty config and rely on `sled_ensure_{disks,datasets}` calls to
+            // populate it. It's also possible our parent blueprint removed
+            // entries because our sled has been expunged, in which case we
+            // won't do any further editing and what we fill in here is
+            // irrelevant.
+            let disks = parent_blueprint
+                .blueprint_disks
+                .get(sled_id)
+                .cloned()
+                .unwrap_or_else(|| BlueprintPhysicalDisksConfig {
+                    generation: Generation::new(),
+                    disks: Vec::new(),
+                });
+            let datasets = parent_blueprint
+                .blueprint_datasets
+                .get(sled_id)
+                .cloned()
+                .unwrap_or_else(|| BlueprintDatasetsConfig {
+                    generation: Generation::new(),
+                    datasets: BTreeMap::new(),
+                });
+            let editor = SledEditor::new(
+                state,
+                zones.clone(),
+                disks,
+                datasets.clone(),
+                build_preexisting_dataset_ids(*sled_id)?,
+            )
+            .with_context(|| {
+                format!("failed to construct SledEditor for sled {sled_id}")
+            })?;
+            sled_editors.insert(*sled_id, editor);
         }
 
-        // Make a garbage collection pass through `sled_state`. We want to keep
-        // any sleds which either:
-        //
-        // 1. do not have a desired state of `Decommissioned`
-        // 2. do have a desired state of `Decommissioned` and are still included
-        //    in our input's list of commissioned sleds
-        //
-        // Sleds that don't fall into either of these cases have reached the
-        // actual `Decommissioned` state, which means we no longer need to carry
-        // forward that desired state.
-        sled_state.retain(|sled_id, state| {
-            *state != SledState::Decommissioned
-                || commissioned_sled_ids.contains(sled_id)
-        });
+        // Add new, empty `SledEditor`s for any commissioned sleds in our input
+        // that weren't in the parent blueprint. (These are newly-added sleds.)
+        for sled_id in input.all_sled_ids(SledFilter::Commissioned) {
+            if let Entry::Vacant(slot) = sled_editors.entry(sled_id) {
+                slot.insert(SledEditor::new_empty(
+                    SledState::Active,
+                    build_preexisting_dataset_ids(sled_id)?,
+                ));
+            }
+        }
 
         Ok(BlueprintBuilder {
             log,
@@ -473,12 +580,7 @@ impl<'a> BlueprintBuilder<'a> {
             sled_ip_allocators: BTreeMap::new(),
             external_networking: OnceCell::new(),
             internal_dns_subnets: OnceCell::new(),
-            zones: BlueprintZonesBuilder::new(parent_blueprint),
-            storage: BlueprintStorageEditor::new(
-                parent_blueprint.blueprint_disks.clone(),
-                parent_blueprint.blueprint_datasets.clone(),
-            ),
-            sled_state,
+            sled_editors,
             cockroachdb_setting_preserve_downgrade: parent_blueprint
                 .cockroachdb_setting_preserve_downgrade,
             creator: creator.to_owned(),
@@ -514,12 +616,12 @@ impl<'a> BlueprintBuilder<'a> {
                 )?;
 
                 BuilderExternalNetworking::new(
-                    self.zones
-                        .current_zones(BlueprintZoneFilter::ShouldBeRunning)
-                        .flat_map(|(_sled_id, zone_config)| zone_config),
-                    self.zones
-                        .current_zones(BlueprintZoneFilter::Expunged)
-                        .flat_map(|(_sled_id, zone_config)| zone_config),
+                    self.sled_editors.values().flat_map(|editor| {
+                        editor.zones(BlueprintZoneFilter::ShouldBeRunning)
+                    }),
+                    self.sled_editors.values().flat_map(|editor| {
+                        editor.zones(BlueprintZoneFilter::Expunged)
+                    }),
                     self.input.service_ip_pool_ranges(),
                 )
             })
@@ -534,9 +636,9 @@ impl<'a> BlueprintBuilder<'a> {
     ) -> Result<&mut DnsSubnetAllocator, Error> {
         self.internal_dns_subnets.get_or_try_init(|| {
             DnsSubnetAllocator::new(
-                self.zones
-                    .current_zones(BlueprintZoneFilter::ShouldBeRunning)
-                    .flat_map(|(_sled_id, zone_config)| zone_config),
+                self.sled_editors.values().flat_map(|editor| {
+                    editor.zones(BlueprintZoneFilter::ShouldBeRunning)
+                }),
                 self.input,
             )
         })?;
@@ -546,8 +648,8 @@ impl<'a> BlueprintBuilder<'a> {
     /// Iterates over the list of sled IDs for which we have zones.
     ///
     /// This may include decommissioned sleds.
-    pub fn sled_ids_with_zones(&self) -> impl Iterator<Item = SledUuid> {
-        self.zones.sled_ids_with_zones()
+    pub fn sled_ids_with_zones(&self) -> impl Iterator<Item = SledUuid> + '_ {
+        self.sled_editors.keys().copied()
     }
 
     pub fn current_sled_zones(
@@ -555,20 +657,82 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
         filter: BlueprintZoneFilter,
     ) -> impl Iterator<Item = &BlueprintZoneConfig> {
-        self.zones.current_sled_zones(sled_id, filter).map(|(config, _)| config)
+        let Some(editor) = self.sled_editors.get(&sled_id) else {
+            return Box::new(iter::empty())
+                as Box<dyn Iterator<Item = &BlueprintZoneConfig>>;
+        };
+        Box::new(editor.zones(filter))
     }
 
     /// Assemble a final [`Blueprint`] based on the contents of the builder
     pub fn build(mut self) -> Blueprint {
+        let blueprint_id = self.rng.next_blueprint();
+
         // Collect the Omicron zones config for all sleds, including sleds that
         // are no longer in service and need expungement work.
-        let blueprint_zones = self
-            .zones
-            .into_zones_map(self.input.all_sled_ids(SledFilter::Commissioned));
-        let (blueprint_disks, blueprint_datasets) =
-            self.storage.into_blueprint_maps(
-                self.input.all_sled_ids(SledFilter::InService),
-            );
+        let mut sled_state = BTreeMap::new();
+        let mut blueprint_zones = BTreeMap::new();
+        let mut blueprint_disks = BTreeMap::new();
+        let mut blueprint_datasets = BTreeMap::new();
+        for (sled_id, editor) in self.sled_editors {
+            let EditedSled { zones, disks, datasets, state, edit_counts } =
+                editor.finalize();
+            sled_state.insert(sled_id, state);
+            blueprint_disks.insert(sled_id, disks);
+            blueprint_datasets.insert(sled_id, datasets);
+            blueprint_zones.insert(sled_id, zones);
+            if edit_counts.has_nonzero_counts() {
+                debug!(
+                    self.log, "sled modified in new blueprint";
+                    "sled_id" => %sled_id,
+                    "blueprint_id" => %blueprint_id,
+                    "disk_edits" => ?edit_counts.disks,
+                    "dataset_edits" => ?edit_counts.datasets,
+                    "zone_edits" => ?edit_counts.zones,
+                );
+            } else {
+                debug!(
+                    self.log, "sled unchanged in new blueprint";
+                    "sled_id" => %sled_id,
+                    "blueprint_id" => %blueprint_id,
+                );
+            }
+        }
+        // Preserving backwards compatibility, for now: prune sled_state of any
+        // fully decommissioned sleds, which we determine by the state being
+        // `Decommissioned` _and_ the sled is no longer in our PlanningInput's
+        // list of commissioned sleds.
+        let commissioned_sled_ids = self
+            .input
+            .all_sled_ids(SledFilter::Commissioned)
+            .collect::<BTreeSet<_>>();
+        sled_state.retain(|sled_id, state| {
+            *state != SledState::Decommissioned
+                || commissioned_sled_ids.contains(sled_id)
+        });
+        // Preserving backwards compatibility, for now: disks should only
+        // have entries for in-service sleds, and expunged disks should be
+        // removed entirely.
+        let in_service_sled_ids = self
+            .input
+            .all_sled_ids(SledFilter::InService)
+            .collect::<BTreeSet<_>>();
+        blueprint_disks.retain(|sled_id, disks_config| {
+            if !in_service_sled_ids.contains(sled_id) {
+                return false;
+            }
+
+            disks_config.disks.retain(|config| match config.disposition {
+                BlueprintPhysicalDiskDisposition::InService => true,
+                BlueprintPhysicalDiskDisposition::Expunged => false,
+            });
+
+            true
+        });
+        // Preserving backwards compatibility, for now: datasets should only
+        // have entries for in-service sleds.
+        blueprint_datasets
+            .retain(|sled_id, _| in_service_sled_ids.contains(sled_id));
 
         // If we have the clickhouse cluster setup enabled via policy and we
         // don't yet have a `ClickhouseClusterConfiguration`, then we must create
@@ -623,11 +787,11 @@ impl<'a> BlueprintBuilder<'a> {
             }
         });
         Blueprint {
-            id: self.rng.next_blueprint(),
+            id: blueprint_id,
             blueprint_zones,
             blueprint_disks,
             blueprint_datasets,
-            sled_state: self.sled_state,
+            sled_state,
             parent_blueprint_id: Some(self.parent_blueprint.id),
             internal_dns_version: self.input.internal_dns_version(),
             external_dns_version: self.input.external_dns_version(),
@@ -655,8 +819,14 @@ impl<'a> BlueprintBuilder<'a> {
         &mut self,
         sled_id: SledUuid,
         desired_state: SledState,
-    ) {
-        self.sled_state.insert(sled_id, desired_state);
+    ) -> Result<(), Error> {
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to set sled state for unknown sled {sled_id}"
+            ))
+        })?;
+        editor.set_state(desired_state);
+        Ok(())
     }
 
     /// Within tests, set an RNG for deterministic results.
@@ -698,12 +868,16 @@ impl<'a> BlueprintBuilder<'a> {
             "sled_id" => sled_id.to_string(),
         ));
 
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to expunge zones for unknown sled {sled_id}"
+            ))
+        })?;
+
         // Do any zones need to be marked expunged?
         let mut zones_to_expunge = BTreeMap::new();
 
-        let sled_zones =
-            self.zones.current_sled_zones(sled_id, BlueprintZoneFilter::All);
-        for (zone_config, state) in sled_zones {
+        for zone_config in editor.zones(BlueprintZoneFilter::All) {
             let zone_id = zone_config.id;
             let log = log.new(o!(
                 "zone_id" => zone_id.to_string()
@@ -715,12 +889,13 @@ impl<'a> BlueprintBuilder<'a> {
                 continue;
             };
 
-            let is_expunged =
-                is_already_expunged(zone_config, state).map_err(|error| {
-                    Error::Planner(anyhow!(error).context(format!(
-                        "for sled {sled_id}, error computing zones to expunge"
-                    )))
-                })?;
+            // TODO-john we lost the check for "are we expunging a zone we
+            // modified in this planner iteration" - do we need that?
+            let is_expunged = match zone_config.disposition {
+                BlueprintZoneDisposition::InService
+                | BlueprintZoneDisposition::Quiesced => false,
+                BlueprintZoneDisposition::Expunged => true,
+            };
 
             if !is_expunged {
                 match reason {
@@ -778,34 +953,13 @@ impl<'a> BlueprintBuilder<'a> {
             return Ok(zones_to_expunge);
         }
 
-        let sled_resources = self.sled_resources(sled_id)?;
-        let mut sled_storage = self.storage.sled_storage_editor(
-            sled_id,
-            sled_resources,
-            &mut self.rng,
-        )?;
-
         // Now expunge all the zones that need it.
-        let removed_zones = {
-            let change = self.zones.change_sled_zones(sled_id);
-            change
-                .expunge_zones(zones_to_expunge.keys().cloned().collect())
-                .map_err(|error| {
-                    Error::Planner(anyhow!(error).context(format!(
-                        "for sled {sled_id}, error expunging zones"
-                    )))
-                })?
-        };
-
-        // Also expunge the datasets of all removed zones.
-        for zone in removed_zones {
-            sled_storage.expunge_zone_datasets(zone);
+        for zone_id in zones_to_expunge.keys() {
+            editor
+                .expunge_zone(&zone_id)
+                .map_err(|err| Error::SledEditError { sled_id, err })?;
         }
 
-        // We're done with `sled_storage`; drop it so the borrow checker is okay
-        // with calling other methods on `self` below.
-        mem::drop(sled_storage);
-
         // Finally, add comments describing what happened.
         //
         // Group the zones by their reason for expungement.
@@ -869,12 +1023,17 @@ impl<'a> BlueprintBuilder<'a> {
         resources: &SledResources,
     ) -> Result<SledEditCounts, Error> {
         // These are the disks known to our (last?) blueprint
-        let mut sled_storage = self.storage.sled_storage_editor(
-            sled_id,
-            resources,
-            &mut self.rng,
-        )?;
-        let blueprint_disk_ids = sled_storage.disk_ids().collect::<Vec<_>>();
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to ensure disks for unknown sled {sled_id}"
+            ))
+        })?;
+        let initial_counts = editor.edit_counts();
+
+        let blueprint_disk_ids = editor
+            .disks(DiskFilter::InService)
+            .map(|config| config.id)
+            .collect::<Vec<_>>();
 
         // These are the in-service disks as we observed them in the database,
         // during the planning phase
@@ -887,42 +1046,28 @@ impl<'a> BlueprintBuilder<'a> {
         // blueprint
         for (disk_id, (zpool, disk)) in database_disks {
             database_disk_ids.insert(disk_id);
-            sled_storage.ensure_disk(BlueprintPhysicalDiskConfig {
-                disposition: BlueprintPhysicalDiskDisposition::InService,
-                identity: disk.disk_identity.clone(),
-                id: disk_id,
-                pool_id: *zpool,
-            });
+            editor.ensure_disk(
+                BlueprintPhysicalDiskConfig {
+                    disposition: BlueprintPhysicalDiskDisposition::InService,
+                    identity: disk.disk_identity.clone(),
+                    id: disk_id,
+                    pool_id: *zpool,
+                },
+                &mut self.rng,
+            );
         }
 
         // Remove any disks that appear in the blueprint, but not the database
-        let mut zones_to_expunge = BTreeSet::new();
         for disk_id in blueprint_disk_ids {
             if !database_disk_ids.contains(&disk_id) {
-                if let Some(expunged_zpool) = sled_storage.remove_disk(&disk_id)
-                {
-                    zones_to_expunge.extend(
-                        self.zones
-                            .zones_using_zpool(
-                                sled_id,
-                                BlueprintZoneFilter::ShouldBeRunning,
-                                &expunged_zpool,
-                            )
-                            .map(|zone| zone.id),
-                    );
-                }
+                editor
+                    .expunge_disk(&disk_id)
+                    .map_err(|err| Error::SledEditError { sled_id, err })?;
             }
         }
-        let mut edit_counts: SledEditCounts = sled_storage.finalize().into();
+        let final_counts = editor.edit_counts();
 
-        // Expunging a zpool necessarily requires also expunging any zones that
-        // depended on it.
-        for zone_id in zones_to_expunge {
-            edit_counts =
-                edit_counts.accum(self.sled_expunge_zone(sled_id, zone_id)?);
-        }
-
-        Ok(edit_counts)
+        Ok(final_counts.difference_since(initial_counts))
     }
 
     /// Ensure that a sled in the blueprint has all the datasets it needs for
@@ -942,31 +1087,32 @@ impl<'a> BlueprintBuilder<'a> {
     pub fn sled_ensure_zone_datasets(
         &mut self,
         sled_id: SledUuid,
-        resources: &SledResources,
     ) -> Result<EnsureMultiple, Error> {
-        let mut sled_storage = self.storage.sled_storage_editor(
-            sled_id,
-            resources,
-            &mut self.rng,
-        )?;
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to ensure zone datasets for unknown sled {sled_id}"
+            ))
+        })?;
 
-        // Ensure that datasets needed for zones exist.
-        for (zone, _zone_state) in self
-            .zones
-            .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
-        {
-            sled_storage.ensure_zone_datasets(zone);
-        }
+        let initial_counts = editor.edit_counts();
+        editor
+            .ensure_datasets_for_running_zones(&mut self.rng)
+            .map_err(|err| Error::SledEditError { sled_id, err })?;
+        let final_counts = editor.edit_counts();
 
-        let StorageEditCounts { disks: disk_edits, datasets: dataset_edits } =
-            sled_storage.finalize();
+        let SledEditCounts { disks, datasets, zones } =
+            final_counts.difference_since(initial_counts);
         debug_assert_eq!(
-            disk_edits,
+            disks,
             EditCounts::zeroes(),
-            "we only edited datasets, not disks"
+            "we only edited datasets"
         );
-
-        Ok(dataset_edits.into())
+        debug_assert_eq!(
+            zones,
+            EditCounts::zeroes(),
+            "we only edited datasets"
+        );
+        Ok(datasets.into())
     }
 
     fn next_internal_dns_gz_address_index(&self, sled_id: SledUuid) -> u32 {
@@ -1077,10 +1223,16 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
     ) -> Result<Ensure, Error> {
         // If there's already an NTP zone on this sled, do nothing.
-        let has_ntp = self
-            .zones
-            .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
-            .any(|(z, _)| z.zone_type.is_ntp());
+        let has_ntp = {
+            let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
+                Error::Planner(anyhow!(
+                    "tried to ensure NTP zone for unknown sled {sled_id}"
+                ))
+            })?;
+            editor
+                .zones(BlueprintZoneFilter::ShouldBeRunning)
+                .any(|z| z.zone_type.is_ntp())
+        };
         if has_ntp {
             return Ok(Ensure::NotNeeded);
         }
@@ -1114,10 +1266,13 @@ impl<'a> BlueprintBuilder<'a> {
         let pool_name = ZpoolName::new_external(zpool_id);
 
         // If this sled already has a Crucible zone on this pool, do nothing.
-        let has_crucible_on_this_pool = self
-            .zones
-            .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
-            .any(|(z, _)| {
+        let has_crucible_on_this_pool = {
+            let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
+                Error::Planner(anyhow!(
+                    "tried to ensure crucible zone for unknown sled {sled_id}"
+                ))
+            })?;
+            editor.zones(BlueprintZoneFilter::ShouldBeRunning).any(|z| {
                 matches!(
                     &z.zone_type,
                     BlueprintZoneType::Crucible(blueprint_zone_type::Crucible {
@@ -1126,7 +1281,8 @@ impl<'a> BlueprintBuilder<'a> {
                     })
                     if dataset.pool_name == pool_name
                 )
-            });
+            })
+        };
         if has_crucible_on_this_pool {
             return Ok(Ensure::NotNeeded);
         }
@@ -1172,9 +1328,12 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
         kind: ZoneKind,
     ) -> usize {
-        self.zones
-            .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
-            .filter(|(z, _)| z.zone_type.kind() == kind)
+        let Some(editor) = self.sled_editors.get(&sled_id) else {
+            return 0;
+        };
+        editor
+            .zones(BlueprintZoneFilter::ShouldBeRunning)
+            .filter(|z| z.zone_type.kind() == kind)
             .count()
     }
 
@@ -1461,20 +1620,18 @@ impl<'a> BlueprintBuilder<'a> {
         dns_servers: Vec<IpAddr>,
         domain: Option<String>,
     ) -> Result<(), Error> {
-        // Check the sled id and return an appropriate error if it's invalid.
-        let _ = self.sled_resources(sled_id)?;
-
-        let sled_zones = self.zones.change_sled_zones(sled_id);
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to promote NTP zone on unknown sled {sled_id}"
+            ))
+        })?;
 
         // Find the internal NTP zone and expunge it.
-        let mut internal_ntp_zone_id_iter = sled_zones
-            .iter_zones(BlueprintZoneFilter::ShouldBeRunning)
-            .filter_map(|config| {
-                if matches!(
-                    config.zone().zone_type,
-                    BlueprintZoneType::InternalNtp(_)
-                ) {
-                    Some(config.zone().id)
+        let mut internal_ntp_zone_id_iter = editor
+            .zones(BlueprintZoneFilter::ShouldBeRunning)
+            .filter_map(|zone| {
+                if matches!(zone.zone_type, BlueprintZoneType::InternalNtp(_)) {
+                    Some(zone.id)
                 } else {
                     None
                 }
@@ -1496,7 +1653,7 @@ impl<'a> BlueprintBuilder<'a> {
         std::mem::drop(internal_ntp_zone_id_iter);
 
         // Expunge the internal NTP zone.
-        sled_zones.expunge_zone(internal_ntp_zone_id).map_err(|error| {
+        editor.expunge_zone(&internal_ntp_zone_id).map_err(|error| {
             Error::Planner(anyhow!(error).context(format!(
                 "error expunging internal NTP zone from sled {sled_id}"
             )))
@@ -1559,31 +1716,18 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
         zone_id: OmicronZoneUuid,
     ) -> Result<SledEditCounts, Error> {
-        let sled_resources = self.sled_resources(sled_id)?;
-
-        let sled_zones = self.zones.change_sled_zones(sled_id);
-        let (builder_config, did_expunge) =
-            sled_zones.expunge_zone(zone_id).map_err(|error| {
-                Error::Planner(
-                    anyhow!(error)
-                        .context("failed to expunge zone from sled {sled_id}"),
-                )
-            })?;
-        let zone_config = builder_config.zone();
-
-        let mut storage = self.storage.sled_storage_editor(
-            sled_id,
-            sled_resources,
-            &mut self.rng,
-        )?;
-        storage.expunge_zone_datasets(zone_config);
-
-        let mut edit_counts: SledEditCounts = storage.finalize().into();
-        if did_expunge {
-            edit_counts.zones.expunged += 1;
-        }
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to expunge zone on unknown sled {sled_id}"
+            ))
+        })?;
+        let initial_counts = editor.edit_counts();
+        editor
+            .expunge_zone(&zone_id)
+            .map_err(|err| Error::SledEditError { sled_id, err })?;
+        let final_counts = editor.edit_counts();
 
-        Ok(edit_counts)
+        Ok(final_counts.difference_since(initial_counts))
     }
 
     fn sled_add_zone(
@@ -1591,30 +1735,25 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
         zone: BlueprintZoneConfig,
     ) -> Result<(), Error> {
-        // Check the sled id and return an appropriate error if it's invalid.
-        let sled_resources = self.sled_resources(sled_id)?;
-        let mut sled_storage = self.storage.sled_storage_editor(
-            sled_id,
-            sled_resources,
-            &mut self.rng,
-        )?;
-        sled_storage.ensure_zone_datasets(&zone);
-
-        let sled_zones = self.zones.change_sled_zones(sled_id);
-        sled_zones.add_zone(zone).map_err(|error| {
-            Error::Planner(
-                anyhow!(error)
-                    .context(format!("error adding zone to sled {sled_id}")),
-            )
+        let editor = self.sled_editors.get_mut(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to add zone on unknown sled {sled_id}"
+            ))
         })?;
-
-        Ok(())
+        editor
+            .add_zone(zone, &mut self.rng)
+            .map_err(|err| Error::SledEditError { sled_id, err })
     }
 
     /// Returns a newly-allocated underlay address suitable for use by Omicron
     /// zones
     fn sled_alloc_ip(&mut self, sled_id: SledUuid) -> Result<Ipv6Addr, Error> {
         let sled_subnet = self.sled_resources(sled_id)?.subnet;
+        let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to allocate underlay IP for unknown sled {sled_id}"
+            ))
+        })?;
         let allocator =
             self.sled_ip_allocators.entry(sled_id).or_insert_with(|| {
                 let sled_subnet_addr = sled_subnet.net().prefix();
@@ -1640,10 +1779,7 @@ impl<'a> BlueprintBuilder<'a> {
 
                 // Record each of the sled's zones' underlay IPs as
                 // allocated.
-                for (z, _) in self
-                    .zones
-                    .current_sled_zones(sled_id, BlueprintZoneFilter::All)
-                {
+                for z in editor.zones(BlueprintZoneFilter::All) {
                     allocator.reserve(z.underlay_ip());
                 }
 
@@ -1653,15 +1789,6 @@ impl<'a> BlueprintBuilder<'a> {
         allocator.alloc().ok_or(Error::OutOfAddresses { sled_id })
     }
 
-    #[cfg(test)]
-    pub(crate) fn sled_select_zpool_for_tests(
-        &self,
-        sled_id: SledUuid,
-        zone_kind: ZoneKind,
-    ) -> Result<ZpoolName, Error> {
-        self.sled_select_zpool(sled_id, zone_kind)
-    }
-
     /// Selects a zpool for this zone type.
     ///
     /// This zpool may be used for either durable storage or transient
@@ -1674,14 +1801,17 @@ impl<'a> BlueprintBuilder<'a> {
         sled_id: SledUuid,
         zone_kind: ZoneKind,
     ) -> Result<ZpoolName, Error> {
+        let editor = self.sled_editors.get(&sled_id).ok_or_else(|| {
+            Error::Planner(anyhow!(
+                "tried to select zpool for unknown sled {sled_id}"
+            ))
+        })?;
+
         // We'll check both the disks available to this sled per our current
         // blueprint and the list of all in-service zpools on this sled per our
         // planning input, and only pick zpools that are available in both.
-        let current_sled_disks = self
-            .storage
-            .current_sled_disks(&sled_id)
-            .ok_or(Error::NoAvailableZpool { sled_id, kind: zone_kind })?
-            .values()
+        let current_sled_disks = editor
+            .disks(DiskFilter::InService)
             .map(|disk_config| disk_config.pool_id)
             .collect::<BTreeSet<_>>();
 
@@ -1758,157 +1888,6 @@ impl<'a> BlueprintBuilder<'a> {
     }
 }
 
-/// Helper for working with sets of zones on each sled
-///
-/// Tracking the set of zones is slightly non-trivial because we need to bump
-/// the per-sled generation number iff the zones are changed.  So we need to
-/// keep track of whether we've changed the zones relative to the parent
-/// blueprint.  We do this by keeping a copy of any [`BlueprintZonesConfig`]
-/// that we've changed and a _reference_ to the parent blueprint's zones.  This
-/// struct makes it easy for callers iterate over the right set of zones.
-pub(super) struct BlueprintZonesBuilder<'a> {
-    changed_zones: BTreeMap<SledUuid, BuilderZonesConfig>,
-    parent_zones: &'a BTreeMap<SledUuid, BlueprintZonesConfig>,
-}
-
-impl<'a> BlueprintZonesBuilder<'a> {
-    pub fn new(parent_blueprint: &'a Blueprint) -> BlueprintZonesBuilder {
-        BlueprintZonesBuilder {
-            changed_zones: BTreeMap::new(),
-            parent_zones: &parent_blueprint.blueprint_zones,
-        }
-    }
-
-    /// Returns a mutable reference to a sled's Omicron zones *because* we're
-    /// going to change them.
-    ///
-    /// This updates internal data structures, and it is recommended that it be
-    /// called only when the caller actually wishes to make changes to zones.
-    /// But making no changes after calling this does not result in a changed
-    /// blueprint. (In particular, the generation number is only updated if
-    /// the state of any zones was updated.)
-    pub fn change_sled_zones(
-        &mut self,
-        sled_id: SledUuid,
-    ) -> &mut BuilderZonesConfig {
-        self.changed_zones.entry(sled_id).or_insert_with(|| {
-            if let Some(old_sled_zones) = self.parent_zones.get(&sled_id) {
-                BuilderZonesConfig::from_parent(old_sled_zones)
-            } else {
-                BuilderZonesConfig::new()
-            }
-        })
-    }
-
-    /// Iterates over the list of sled IDs for which we have zones.
-    ///
-    /// This may include decommissioned sleds.
-    pub fn sled_ids_with_zones(&self) -> impl Iterator<Item = SledUuid> {
-        let mut sled_ids =
-            self.changed_zones.keys().copied().collect::<BTreeSet<_>>();
-        for &sled_id in self.parent_zones.keys() {
-            sled_ids.insert(sled_id);
-        }
-        sled_ids.into_iter()
-    }
-
-    /// Iterates over the list of `current_sled_zones` for all sled IDs for
-    /// which we have zones.
-    ///
-    /// This may include decommissioned sleds.
-    pub fn current_zones(
-        &self,
-        filter: BlueprintZoneFilter,
-    ) -> impl Iterator<Item = (SledUuid, Vec<&BlueprintZoneConfig>)> {
-        let sled_ids = self.sled_ids_with_zones();
-        sled_ids.map(move |sled_id| {
-            let zones = self
-                .current_sled_zones(sled_id, filter)
-                .map(|(zone_config, _)| zone_config)
-                .collect();
-            (sled_id, zones)
-        })
-    }
-
-    /// Iterates over the list of Omicron zones currently configured for this
-    /// sled in the blueprint that's being built, along with each zone's state
-    /// in the builder.
-    pub fn current_sled_zones(
-        &self,
-        sled_id: SledUuid,
-        filter: BlueprintZoneFilter,
-    ) -> Box<dyn Iterator<Item = (&BlueprintZoneConfig, BuilderZoneState)> + '_>
-    {
-        if let Some(sled_zones) = self.changed_zones.get(&sled_id) {
-            Box::new(
-                sled_zones.iter_zones(filter).map(|z| (z.zone(), z.state())),
-            )
-        } else if let Some(parent_zones) = self.parent_zones.get(&sled_id) {
-            Box::new(parent_zones.zones.iter().filter_map(move |z| {
-                if z.disposition.matches(filter) {
-                    Some((z, BuilderZoneState::Unchanged))
-                } else {
-                    None
-                }
-            }))
-        } else {
-            Box::new(std::iter::empty())
-        }
-    }
-
-    /// Builds a set of all zones whose filesystem or durable dataset reside on
-    /// the given `zpool`.
-    pub fn zones_using_zpool<'b>(
-        &'b self,
-        sled_id: SledUuid,
-        filter: BlueprintZoneFilter,
-        zpool: &'b ZpoolName,
-    ) -> impl Iterator<Item = &'b BlueprintZoneConfig> + 'b {
-        self.current_sled_zones(sled_id, filter).filter_map(
-            move |(config, _state)| {
-                if Some(zpool) == config.filesystem_pool.as_ref()
-                    || Some(zpool) == config.zone_type.durable_zpool()
-                {
-                    Some(config)
-                } else {
-                    None
-                }
-            },
-        )
-    }
-
-    /// Produces an owned map of zones for the sleds recorded in this blueprint
-    /// plus any newly-added sleds
-    pub fn into_zones_map(
-        self,
-        added_sled_ids: impl Iterator<Item = SledUuid>,
-    ) -> BTreeMap<SledUuid, BlueprintZonesConfig> {
-        // Start with self.changed_zones, which contains entries for any
-        // sled whose zones config is changing in this blueprint.
-        let mut zones = self
-            .changed_zones
-            .into_iter()
-            .map(|(sled_id, zones)| (sled_id, zones.build()))
-            .collect::<BTreeMap<_, _>>();
-
-        // Carry forward any zones from our parent blueprint. This may include
-        // zones for decommissioned sleds.
-        for (sled_id, parent_zones) in self.parent_zones {
-            zones.entry(*sled_id).or_insert_with(|| parent_zones.clone());
-        }
-
-        // Finally, insert any newly-added sleds.
-        for sled_id in added_sled_ids {
-            zones.entry(sled_id).or_insert_with(|| BlueprintZonesConfig {
-                generation: Generation::new(),
-                zones: vec![],
-            });
-        }
-
-        zones
-    }
-}
-
 #[cfg(test)]
 pub mod test {
     use super::*;
@@ -2035,9 +2014,13 @@ pub mod test {
             }
         }
 
-        // All commissioned disks should have debug and zone root datasets.
+        // All disks should have debug and zone root datasets.
         for (sled_id, disk_config) in &blueprint.blueprint_disks {
             for disk in &disk_config.disks {
+                eprintln!(
+                    "checking datasets for sled {sled_id} disk {}",
+                    disk.id
+                );
                 let zpool = ZpoolName::new_external(disk.pool_id);
                 let datasets = datasets_for_sled(&blueprint, *sled_id);
 
@@ -2074,10 +2057,8 @@ pub mod test {
             }
             let datasets = datasets_for_sled(&blueprint, sled_id);
 
-            let zpool = zone_config.filesystem_pool.as_ref().unwrap();
-            let kind = DatasetKind::TransientZone {
-                name: storage_editor::zone_name(&zone_config),
-            };
+            let (zpool, kind) =
+                zone_config.filesystem_dataset().unwrap().into_parts();
             let dataset = find_dataset(&datasets, &zpool, kind);
             assert_eq!(
                 dataset.disposition,
@@ -2256,9 +2237,7 @@ pub mod test {
         for pool_id in new_sled_resources.zpools.keys() {
             builder.sled_ensure_zone_crucible(new_sled_id, *pool_id).unwrap();
         }
-        builder
-            .sled_ensure_zone_datasets(new_sled_id, new_sled_resources)
-            .unwrap();
+        builder.sled_ensure_zone_datasets(new_sled_id).unwrap();
 
         let blueprint3 = builder.build();
         verify_blueprint(&blueprint3);
@@ -2381,7 +2360,7 @@ pub mod test {
         // Generate a new blueprint. This sled should still be included: even
         // though the desired state is decommissioned, the current state is
         // still active, so we should carry it forward.
-        let blueprint2 = BlueprintBuilder::new_based_on(
+        let mut blueprint2 = BlueprintBuilder::new_based_on(
             &logctx.log,
             &blueprint1,
             &input,
@@ -2399,11 +2378,21 @@ pub mod test {
         );
 
         // Change the input to mark the sled decommissioned. (Normally realizing
-        // blueprint2 would make this change.)
+        // blueprint2 would make this change.) We must also mark all its zones
+        // expunged to avoid tripping over an invalid state check in
+        // `new_based_on()`.
         let mut builder = input.into_builder();
         builder.sleds_mut().get_mut(&decommision_sled_id).unwrap().state =
             SledState::Decommissioned;
         let input = builder.build();
+        for z in &mut blueprint2
+            .blueprint_zones
+            .get_mut(&decommision_sled_id)
+            .unwrap()
+            .zones
+        {
+            z.disposition = BlueprintZoneDisposition::Expunged;
+        }
 
         // Generate a new blueprint. This desired sled state should no longer be
         // present: it has reached the terminal decommissioned state, so there's
@@ -2468,9 +2457,11 @@ pub mod test {
             // not have any disks in them.
             for sled_id in input.all_sled_ids(SledFilter::InService) {
                 let disks = builder
-                    .storage
-                    .current_sled_disks(&sled_id)
-                    .expect("found disks config for sled");
+                    .sled_editors
+                    .get(&sled_id)
+                    .unwrap()
+                    .disks(DiskFilter::All)
+                    .collect::<Vec<_>>();
                 assert!(
                     disks.is_empty(),
                     "expected empty disks for sled {sled_id}, got {disks:?}"
@@ -2505,19 +2496,14 @@ pub mod test {
                 );
             }
 
-            let new_disks = builder
-                .storage
-                .into_blueprint_maps(input.all_sled_ids(SledFilter::InService))
-                .0;
             // We should have disks and a generation bump for every sled.
             let parent_disk_gens = parent
                 .blueprint_disks
                 .iter()
                 .map(|(&sled_id, config)| (sled_id, config.generation));
             for (sled_id, parent_gen) in parent_disk_gens {
-                let new_sled_disks = new_disks
-                    .get(&sled_id)
-                    .expect("found child entry for sled present in parent");
+                let EditedSled { disks: new_sled_disks, .. } =
+                    builder.sled_editors.remove(&sled_id).unwrap().finalize();
                 assert_eq!(new_sled_disks.generation, parent_gen.next());
                 assert_eq!(
                     new_sled_disks.disks.len(),
@@ -2577,11 +2563,8 @@ pub mod test {
         // Before we make any modifications, there should be no work to do.
         //
         // If we haven't changed inputs, the output should be the same!
-        for (sled_id, resources) in
-            input.all_sled_resources(SledFilter::Commissioned)
-        {
-            let r =
-                builder.sled_ensure_zone_datasets(sled_id, resources).unwrap();
+        for sled_id in input.all_sled_ids(SledFilter::Commissioned) {
+            let r = builder.sled_ensure_zone_datasets(sled_id).unwrap();
             assert_eq!(r, EnsureMultiple::NotNeeded);
         }
 
@@ -2591,48 +2574,32 @@ pub mod test {
             .all_sled_ids(SledFilter::Commissioned)
             .next()
             .expect("at least one sled present");
-        let sled_details =
-            input.sled_lookup(SledFilter::Commissioned, sled_id).unwrap();
-        let crucible_zone_id = builder
-            .zones
-            .current_sled_zones(sled_id, BlueprintZoneFilter::ShouldBeRunning)
-            .find_map(|(zone_config, _)| {
+        let editor =
+            builder.sled_editors.get_mut(&sled_id).expect("found sled");
+        let crucible_zone_id = editor
+            .zones(BlueprintZoneFilter::ShouldBeRunning)
+            .find_map(|zone_config| {
                 if zone_config.zone_type.is_crucible() {
                     return Some(zone_config.id);
                 }
                 None
             })
             .expect("at least one crucible must be present");
-        let change = builder.zones.change_sled_zones(sled_id);
         println!("Expunging crucible zone: {crucible_zone_id}");
-        let expunged_zones =
-            change.expunge_zones(BTreeSet::from([crucible_zone_id])).unwrap();
-        assert_eq!(expunged_zones.len(), 1);
+
+        let initial_counts = editor.edit_counts();
+        editor.expunge_zone(&crucible_zone_id).expect("expunged crucible");
+        let changed_counts =
+            editor.edit_counts().difference_since(initial_counts);
 
         // In the case of Crucible, we have a durable dataset and a transient
         // zone filesystem, so we expect two datasets to be expunged.
-        let r = builder
-            .storage
-            .sled_storage_editor(
-                sled_id,
-                &sled_details.resources,
-                &mut builder.rng,
-            )
-            .unwrap()
-            .expunge_zone_datasets(&expunged_zones[0]);
         assert_eq!(
-            r,
-            EnsureMultiple::Changed {
-                added: 0,
-                updated: 0,
-                expunged: 2,
-                removed: 0
-            }
+            changed_counts.datasets,
+            EditCounts { added: 0, updated: 0, expunged: 2, removed: 0 }
         );
         // Once the datasets are expunged, no further changes will be proposed.
-        let r = builder
-            .sled_ensure_zone_datasets(sled_id, &sled_details.resources)
-            .unwrap();
+        let r = builder.sled_ensure_zone_datasets(sled_id).unwrap();
         assert_eq!(r, EnsureMultiple::NotNeeded);
 
         let blueprint = builder.build();
@@ -2649,9 +2616,7 @@ pub mod test {
 
         // While the datasets still exist in the input (effectively, the db) we
         // cannot remove them.
-        let r = builder
-            .sled_ensure_zone_datasets(sled_id, &sled_details.resources)
-            .unwrap();
+        let r = builder.sled_ensure_zone_datasets(sled_id).unwrap();
         assert_eq!(r, EnsureMultiple::NotNeeded);
 
         let blueprint = builder.build();
@@ -2703,11 +2668,7 @@ pub mod test {
 
         // Now, we should see the datasets "removed" from the blueprint, since
         // we no longer need to keep around records of their expungement.
-        let sled_details =
-            input.sled_lookup(SledFilter::Commissioned, sled_id).unwrap();
-        let r = builder
-            .sled_ensure_zone_datasets(sled_id, &sled_details.resources)
-            .unwrap();
+        let r = builder.sled_ensure_zone_datasets(sled_id).unwrap();
 
         // TODO(https://github.com/oxidecomputer/omicron/issues/6646):
         // Because of the workaround for #6646, we don't actually remove
@@ -2950,9 +2911,7 @@ pub mod test {
                 .sled_add_zone_cockroachdb(target_sled_id)
                 .expect("added CRDB zone");
         }
-        builder
-            .sled_ensure_zone_datasets(target_sled_id, sled_resources)
-            .unwrap();
+        builder.sled_ensure_zone_datasets(target_sled_id).unwrap();
 
         let blueprint = builder.build();
         verify_blueprint(&blueprint);
@@ -3003,4 +2962,92 @@ pub mod test {
 
         logctx.cleanup_successful();
     }
+
+    // This test can go away with
+    // https://github.com/oxidecomputer/omicron/issues/6645; for now, it
+    // confirms we maintain the compatibility layer it needs.
+    #[test]
+    fn test_backcompat_reuse_existing_database_dataset_ids() {
+        static TEST_NAME: &str =
+            "backcompat_reuse_existing_database_dataset_ids";
+        let logctx = test_setup_log(TEST_NAME);
+
+        // Start with the standard example blueprint.
+        let (collection, input, mut parent) = example(&logctx.log, TEST_NAME);
+
+        // `parent` was not created prior to the addition of disks and datasets,
+        // so it should have datasets for all the disks and zones, and the
+        // dataset IDs should match the input.
+        let mut input_dataset_ids = BTreeMap::new();
+        let mut input_ndatasets = 0;
+        for (_, resources) in input.all_sled_resources(SledFilter::All) {
+            for (zpool_id, dataset_configs) in
+                resources.all_datasets(ZpoolFilter::All)
+            {
+                for dataset in dataset_configs {
+                    let id = dataset.id;
+                    let kind = dataset.name.dataset();
+                    let by_kind: &mut BTreeMap<_, _> =
+                        input_dataset_ids.entry(*zpool_id).or_default();
+                    let prev = by_kind.insert(kind.clone(), id);
+                    input_ndatasets += 1;
+                    assert!(prev.is_none());
+                }
+            }
+        }
+        // We should have 3 datasets per disk (debug + zone root + crucible),
+        // plus some number of datasets for discretionary zones. We'll just
+        // check that we have more than 3 per disk.
+        assert!(
+            input_ndatasets
+                > 3 * usize::from(SledBuilder::DEFAULT_NPOOLS)
+                    * ExampleSystemBuilder::DEFAULT_N_SLEDS,
+            "too few datasets: {input_ndatasets}"
+        );
+
+        // Now _remove_ the blueprint datasets entirely, to emulate a
+        // pre-dataset-addition blueprint.
+        parent.blueprint_datasets = BTreeMap::new();
+
+        // Build a new blueprint.
+        let mut builder = BlueprintBuilder::new_based_on(
+            &logctx.log,
+            &parent,
+            &input,
+            &collection,
+            TEST_NAME,
+        )
+        .expect("failed to create builder");
+
+        // Ensure disks and datasets. This should repopulate the datasets.
+        for (sled_id, resources) in input.all_sled_resources(SledFilter::All) {
+            builder
+                .sled_ensure_disks(sled_id, resources)
+                .expect("ensured disks");
+            builder
+                .sled_ensure_zone_datasets(sled_id)
+                .expect("ensured zone datasets");
+        }
+        let output = builder.build();
+
+        // Repeat the logic above on our new blueprint; it should have the same
+        // number of datasets, and they should all have identical IDs.
+        let mut output_dataset_ids = BTreeMap::new();
+        let mut output_ndatasets = 0;
+        for datasets in output.blueprint_datasets.values() {
+            for (id, dataset) in &datasets.datasets {
+                let zpool_id = dataset.pool.id();
+                let kind = dataset.kind.clone();
+                let by_kind: &mut BTreeMap<_, _> =
+                    output_dataset_ids.entry(zpool_id).or_default();
+                let prev = by_kind.insert(kind, *id);
+                output_ndatasets += 1;
+                assert!(prev.is_none());
+            }
+        }
+        assert_eq!(input_ndatasets, output_ndatasets);
+        assert_eq!(input_dataset_ids, output_dataset_ids);
+
+        logctx.cleanup_successful();
+    }
 }
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder/datasets_editor.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder/datasets_editor.rs
deleted file mode 100644
index 160b841a88..0000000000
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder/datasets_editor.rs
+++ /dev/null
@@ -1,348 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Helper for editing the datasets of a Blueprint
-
-use super::EditCounts;
-use crate::planner::PlannerRng;
-use illumos_utils::zpool::ZpoolName;
-use nexus_types::deployment::BlueprintDatasetConfig;
-use nexus_types::deployment::BlueprintDatasetDisposition;
-use nexus_types::deployment::BlueprintDatasetsConfig;
-use nexus_types::deployment::SledResources;
-use nexus_types::deployment::ZpoolFilter;
-use omicron_common::api::external::ByteCount;
-use omicron_common::api::external::Generation;
-use omicron_common::disk::CompressionAlgorithm;
-use omicron_common::disk::DatasetKind;
-use omicron_common::disk::DatasetName;
-use omicron_common::disk::GzipLevel;
-use omicron_uuid_kinds::DatasetUuid;
-use omicron_uuid_kinds::SledUuid;
-use omicron_uuid_kinds::ZpoolUuid;
-use std::collections::btree_map::Entry;
-use std::collections::BTreeMap;
-use std::collections::BTreeSet;
-use std::net::SocketAddrV6;
-
-#[derive(Debug, thiserror::Error)]
-pub enum BlueprintDatasetsEditError {
-    #[error(
-        "{data_source} inconsistency: multiple datasets with kind {kind:?} \
-         on zpool {zpool_id}: {id1}, {id2}"
-    )]
-    MultipleDatasetsOfKind {
-        data_source: &'static str,
-        zpool_id: ZpoolUuid,
-        kind: DatasetKind,
-        id1: DatasetUuid,
-        id2: DatasetUuid,
-    },
-}
-
-/// Helper for working with sets of datasets on each sled
-///
-/// Tracking the set of datasets is slightly non-trivial because we need to
-/// bump the per-sled generation number iff the datasets are changed.  So
-/// we need to keep track of whether we've changed the datasets relative
-/// to the parent blueprint.
-#[derive(Debug)]
-pub(super) struct BlueprintDatasetsEditor {
-    current: BTreeMap<SledUuid, BlueprintDatasetsConfig>,
-    changed: BTreeSet<SledUuid>,
-}
-
-impl BlueprintDatasetsEditor {
-    pub fn new(current: BTreeMap<SledUuid, BlueprintDatasetsConfig>) -> Self {
-        Self { current, changed: BTreeSet::new() }
-    }
-
-    /// Get a helper to edit the datasets of a specific sled.
-    ///
-    /// If any changes are made via the returned editor, the sled will be
-    /// recorded as needing a generation bump in its dataset config when the
-    /// editor is dropped.
-    pub fn sled_datasets_editor<'a>(
-        &'a mut self,
-        sled_id: SledUuid,
-        sled_resources: &SledResources,
-        rng: &'a mut PlannerRng,
-    ) -> Result<SledDatasetsEditor<'a>, BlueprintDatasetsEditError> {
-        let config = self
-            .current
-            .entry(sled_id)
-            .or_insert_with(empty_blueprint_datasets_config);
-
-        // Gather all dataset IDs known to the database.
-        //
-        // See the comment below where this is used; this is a
-        // backwards-compatibility layer for
-        // https://github.com/oxidecomputer/omicron/issues/6645.
-        let database_dataset_ids = build_dataset_kind_id_map(
-            "database",
-            sled_resources.all_datasets(ZpoolFilter::InService).flat_map(
-                |(&zpool_id, configs)| {
-                    configs.iter().map(move |config| {
-                        (zpool_id, config.name.dataset().clone(), config.id)
-                    })
-                },
-            ),
-        )?;
-
-        SledDatasetsEditor::new(
-            rng,
-            database_dataset_ids,
-            sled_id,
-            config,
-            &mut self.changed,
-        )
-    }
-
-    pub fn build(
-        mut self,
-        sled_ids: impl Iterator<Item = SledUuid>,
-    ) -> BTreeMap<SledUuid, BlueprintDatasetsConfig> {
-        sled_ids
-            .map(|sled_id| {
-                let config = match self.current.remove(&sled_id) {
-                    Some(mut config) => {
-                        // Bump generation number for any sled whose
-                        // DatasetsConfig changed
-                        if self.changed.contains(&sled_id) {
-                            config.generation = config.generation.next();
-                        }
-                        config
-                    }
-                    None => empty_blueprint_datasets_config(),
-                };
-                (sled_id, config)
-            })
-            .collect()
-    }
-}
-
-#[derive(Debug)]
-pub(super) struct SledDatasetsEditor<'a> {
-    rng: &'a mut PlannerRng,
-    blueprint_dataset_ids:
-        BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
-    database_dataset_ids:
-        BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
-    config: &'a mut BlueprintDatasetsConfig,
-    counts: EditCounts,
-    sled_id: SledUuid,
-    parent_changed_set: &'a mut BTreeSet<SledUuid>,
-}
-
-impl Drop for SledDatasetsEditor<'_> {
-    fn drop(&mut self) {
-        if self.counts.has_nonzero_counts() {
-            self.parent_changed_set.insert(self.sled_id);
-        }
-    }
-}
-
-impl<'a> SledDatasetsEditor<'a> {
-    fn new(
-        rng: &'a mut PlannerRng,
-        database_dataset_ids: BTreeMap<
-            ZpoolUuid,
-            BTreeMap<DatasetKind, DatasetUuid>,
-        >,
-        sled_id: SledUuid,
-        config: &'a mut BlueprintDatasetsConfig,
-        parent_changed_set: &'a mut BTreeSet<SledUuid>,
-    ) -> Result<Self, BlueprintDatasetsEditError> {
-        let blueprint_dataset_ids = build_dataset_kind_id_map(
-            "parent blueprint",
-            config.datasets.values().map(|dataset| {
-                (dataset.pool.id(), dataset.kind.clone(), dataset.id)
-            }),
-        )?;
-        Ok(Self {
-            rng,
-            blueprint_dataset_ids,
-            database_dataset_ids,
-            config,
-            counts: EditCounts::zeroes(),
-            sled_id,
-            parent_changed_set,
-        })
-    }
-
-    pub fn expunge_datasets_if<F>(&mut self, mut expunge_if: F) -> usize
-    where
-        F: FnMut(&BlueprintDatasetConfig) -> bool,
-    {
-        let mut num_expunged = 0;
-
-        for dataset in self.config.datasets.values_mut() {
-            match dataset.disposition {
-                // Already expunged; ignore
-                BlueprintDatasetDisposition::Expunged => continue,
-                // Potentially expungeable
-                BlueprintDatasetDisposition::InService => (),
-            }
-            if expunge_if(&*dataset) {
-                dataset.disposition = BlueprintDatasetDisposition::Expunged;
-                num_expunged += 1;
-                self.counts.expunged += 1;
-            }
-        }
-
-        num_expunged
-    }
-
-    pub fn ensure_debug_dataset(&mut self, zpool: ZpoolName) {
-        const DEBUG_QUOTA_SIZE_GB: u32 = 100;
-
-        let address = None;
-        let quota = Some(ByteCount::from_gibibytes_u32(DEBUG_QUOTA_SIZE_GB));
-        let reservation = None;
-
-        self.ensure_dataset(
-            DatasetName::new(zpool, DatasetKind::Debug),
-            address,
-            quota,
-            reservation,
-            CompressionAlgorithm::GzipN { level: GzipLevel::new::<9>() },
-        )
-    }
-
-    pub fn ensure_zone_root_dataset(&mut self, zpool: ZpoolName) {
-        let address = None;
-        let quota = None;
-        let reservation = None;
-
-        self.ensure_dataset(
-            DatasetName::new(zpool, DatasetKind::TransientZoneRoot),
-            address,
-            quota,
-            reservation,
-            CompressionAlgorithm::Off,
-        )
-    }
-
-    /// Ensures a dataset exists on this sled.
-    ///
-    /// - If the dataset exists in the blueprint already, use it.
-    /// - Otherwise, if the dataset exists in the database, re-use the UUID, but
-    ///   add it to the blueprint.
-    /// - Otherwise, create a new dataset in the blueprint, which will propagate
-    ///   to the database during execution.
-    pub fn ensure_dataset(
-        &mut self,
-        dataset: DatasetName,
-        address: Option<SocketAddrV6>,
-        quota: Option<ByteCount>,
-        reservation: Option<ByteCount>,
-        compression: CompressionAlgorithm,
-    ) {
-        let zpool_id = dataset.pool().id();
-        let kind = dataset.dataset();
-
-        let make_config = |id: DatasetUuid| BlueprintDatasetConfig {
-            disposition: BlueprintDatasetDisposition::InService,
-            id,
-            pool: dataset.pool().clone(),
-            kind: kind.clone(),
-            address,
-            quota,
-            reservation,
-            compression,
-        };
-
-        // Is this dataset already in the blueprint? If so, update it if it's
-        // changed.
-        if let Some(existing_id) = self
-            .blueprint_dataset_ids
-            .get(&zpool_id)
-            .and_then(|kind_to_id| kind_to_id.get(kind))
-        {
-            // We built `self.blueprint_dataset_ids` based on the contents of
-            // `self.config.datasets`, so we know we can unwrap this `get_mut`.
-            let old_config = self.config.datasets.get_mut(existing_id).expect(
-                "internal inconsistency: \
-                 entry in blueprint_dataset_ids but not current",
-            );
-            let new_config = make_config(*existing_id);
-
-            if new_config != *old_config {
-                *old_config = new_config;
-                self.counts.updated += 1;
-            }
-
-            return;
-        }
-
-        // Is there a dataset ID matching this one in the database? If so, use
-        // that.
-        //
-        // TODO(https://github.com/oxidecomputer/omicron/issues/6645): We
-        // could avoid reading from the datastore if we were confident all
-        // provisioned datasets existed in the parent blueprint.
-        let id = self
-            .database_dataset_ids
-            .get(&zpool_id)
-            .and_then(|kind_to_id| kind_to_id.get(kind))
-            .copied()
-            .unwrap_or_else(|| self.rng.next_dataset());
-
-        self.config.datasets.insert(id, make_config(id));
-        self.counts.added += 1;
-
-        // We updated our config, so also record this ID in our "present in
-        // the blueprint" map. We know the entry doesn't exist or we would have
-        // found it when we checked above.
-        self.blueprint_dataset_ids
-            .entry(zpool_id)
-            .or_default()
-            .insert(kind.clone(), id);
-    }
-
-    /// Consume this editor, returning a summary of changes made.
-    pub fn finalize(self) -> EditCounts {
-        self.counts
-    }
-}
-
-fn build_dataset_kind_id_map(
-    data_source: &'static str,
-    iter: impl Iterator<Item = (ZpoolUuid, DatasetKind, DatasetUuid)>,
-) -> Result<
-    BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
-    BlueprintDatasetsEditError,
-> {
-    let mut kind_id_map: BTreeMap<
-        ZpoolUuid,
-        BTreeMap<DatasetKind, DatasetUuid>,
-    > = BTreeMap::new();
-    for (zpool_id, kind, dataset_id) in iter {
-        let dataset_ids_by_kind = kind_id_map.entry(zpool_id).or_default();
-        match dataset_ids_by_kind.entry(kind) {
-            Entry::Vacant(slot) => {
-                slot.insert(dataset_id);
-            }
-            Entry::Occupied(prev) => {
-                return Err(
-                    BlueprintDatasetsEditError::MultipleDatasetsOfKind {
-                        data_source,
-                        zpool_id,
-                        kind: prev.key().clone(),
-                        id1: *prev.get(),
-                        id2: dataset_id,
-                    },
-                );
-            }
-        }
-    }
-    Ok(kind_id_map)
-}
-
-fn empty_blueprint_datasets_config() -> BlueprintDatasetsConfig {
-    BlueprintDatasetsConfig {
-        generation: Generation::new(),
-        datasets: BTreeMap::new(),
-    }
-}
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder/disks_editor.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder/disks_editor.rs
deleted file mode 100644
index 7c5c4c318f..0000000000
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder/disks_editor.rs
+++ /dev/null
@@ -1,195 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Helper for editing the disks of a Blueprint
-
-use super::EditCounts;
-use nexus_types::deployment::BlueprintPhysicalDiskConfig;
-use nexus_types::deployment::BlueprintPhysicalDisksConfig;
-use omicron_common::api::external::Generation;
-use omicron_uuid_kinds::PhysicalDiskUuid;
-use omicron_uuid_kinds::SledUuid;
-use std::collections::btree_map::Entry;
-use std::collections::BTreeMap;
-use std::collections::BTreeSet;
-
-/// Helper for working with sets of disks on each sled
-///
-/// Tracking the set of disks is slightly non-trivial because we need to
-/// bump the per-sled generation number iff the disks are changed.  So
-/// we need to keep track of whether we've changed the disks relative
-/// to the parent blueprint.
-#[derive(Debug)]
-pub(super) struct BlueprintDisksEditor {
-    current: BTreeMap<SledUuid, DisksConfig>,
-    changed: BTreeSet<SledUuid>,
-}
-
-impl BlueprintDisksEditor {
-    pub fn new(
-        current: BTreeMap<SledUuid, BlueprintPhysicalDisksConfig>,
-    ) -> Self {
-        let current = current
-            .into_iter()
-            .map(|(sled_id, config)| (sled_id, config.into()))
-            .collect();
-        Self { current, changed: BTreeSet::new() }
-    }
-
-    /// Get a helper to edit the disks of a specific sled.
-    ///
-    /// If any changes are made via the returned editor, the sled will be
-    /// recorded as needing a generation bump in its disk config when the editor
-    /// is dropped.
-    pub fn sled_disks_editor(
-        &mut self,
-        sled_id: SledUuid,
-    ) -> SledDisksEditor<'_> {
-        let config =
-            self.current.entry(sled_id).or_insert_with(DisksConfig::empty);
-        SledDisksEditor::new(sled_id, config, &mut self.changed)
-    }
-
-    pub fn current_sled_disks(
-        &self,
-        sled_id: &SledUuid,
-    ) -> Option<&BTreeMap<PhysicalDiskUuid, BlueprintPhysicalDiskConfig>> {
-        let config = self.current.get(sled_id)?;
-        Some(&config.disks)
-    }
-
-    /// Compile all edits into a new map suitable for a blueprint's
-    /// `blueprint_disks`, bumping the generation number for any sleds whose
-    /// disk config changed.
-    ///
-    /// Only sleds listed in `sled_ids` will be present in the returned map.
-    /// This primarily allows the caller to drop sleds that are no longer in
-    /// service. (Any new sleds will be given an empty set of disks, but
-    /// presumably any new sleds will have _some_ disks that will have already
-    /// been populated via a relevant `sled_disks_editor()` call.)
-    pub fn build(
-        mut self,
-        sled_ids: impl Iterator<Item = SledUuid>,
-    ) -> BTreeMap<SledUuid, BlueprintPhysicalDisksConfig> {
-        sled_ids
-            .map(|sled_id| {
-                let config = match self.current.remove(&sled_id) {
-                    Some(mut config) => {
-                        // Bump generation number for any sled whose DisksConfig
-                        // changed
-                        if self.changed.contains(&sled_id) {
-                            config.generation = config.generation.next()
-                        }
-                        config.into()
-                    }
-                    None => DisksConfig::empty().into(),
-                };
-                (sled_id, config)
-            })
-            .collect()
-    }
-}
-
-#[derive(Debug)]
-pub(super) struct SledDisksEditor<'a> {
-    config: &'a mut DisksConfig,
-    counts: EditCounts,
-    sled_id: SledUuid,
-    parent_changed_set: &'a mut BTreeSet<SledUuid>,
-}
-
-impl Drop for SledDisksEditor<'_> {
-    fn drop(&mut self) {
-        if self.counts.has_nonzero_counts() {
-            self.parent_changed_set.insert(self.sled_id);
-        }
-    }
-}
-
-impl<'a> SledDisksEditor<'a> {
-    fn new(
-        sled_id: SledUuid,
-        config: &'a mut DisksConfig,
-        parent_changed_set: &'a mut BTreeSet<SledUuid>,
-    ) -> Self {
-        Self {
-            config,
-            counts: EditCounts::zeroes(),
-            sled_id,
-            parent_changed_set,
-        }
-    }
-
-    pub fn disk_ids(&self) -> impl Iterator<Item = PhysicalDiskUuid> + '_ {
-        self.config.disks.keys().copied()
-    }
-
-    pub fn ensure_disk(&mut self, disk: BlueprintPhysicalDiskConfig) {
-        let disk_id = disk.id;
-        match self.config.disks.entry(disk_id) {
-            Entry::Vacant(slot) => {
-                slot.insert(disk);
-                self.counts.added += 1;
-            }
-            Entry::Occupied(mut slot) => {
-                if *slot.get() != disk {
-                    slot.insert(disk);
-                    self.counts.updated += 1;
-                }
-            }
-        }
-    }
-
-    pub fn remove_disk(
-        &mut self,
-        disk_id: &PhysicalDiskUuid,
-    ) -> Option<BlueprintPhysicalDiskConfig> {
-        let old = self.config.disks.remove(disk_id);
-        if old.is_some() {
-            self.counts.removed += 1;
-        }
-        old
-    }
-
-    pub fn finalize(self) -> EditCounts {
-        self.counts
-    }
-}
-
-// We want add and remove to be cheap and easy to check whether they performed
-// the requested operation, so we'll internally convert from the vec of disks to
-// a map of disks keyed by disk ID.
-#[derive(Debug)]
-struct DisksConfig {
-    generation: Generation,
-    disks: BTreeMap<PhysicalDiskUuid, BlueprintPhysicalDiskConfig>,
-}
-
-impl DisksConfig {
-    fn empty() -> Self {
-        Self { generation: Generation::new(), disks: BTreeMap::new() }
-    }
-}
-
-impl From<DisksConfig> for BlueprintPhysicalDisksConfig {
-    fn from(config: DisksConfig) -> Self {
-        BlueprintPhysicalDisksConfig {
-            generation: config.generation,
-            disks: config.disks.into_values().collect(),
-        }
-    }
-}
-
-impl From<BlueprintPhysicalDisksConfig> for DisksConfig {
-    fn from(config: BlueprintPhysicalDisksConfig) -> Self {
-        Self {
-            generation: config.generation,
-            disks: config
-                .disks
-                .into_iter()
-                .map(|disk| (disk.id, disk))
-                .collect(),
-        }
-    }
-}
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/builder/storage_editor.rs b/nexus/reconfigurator/planning/src/blueprint_builder/builder/storage_editor.rs
deleted file mode 100644
index 2119656da3..0000000000
--- a/nexus/reconfigurator/planning/src/blueprint_builder/builder/storage_editor.rs
+++ /dev/null
@@ -1,206 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-//! Helper for editing the storage (disks and datasets) of a Blueprint
-
-use crate::planner::PlannerRng;
-
-use super::datasets_editor::BlueprintDatasetsEditError;
-use super::datasets_editor::BlueprintDatasetsEditor;
-use super::datasets_editor::SledDatasetsEditor;
-use super::disks_editor::BlueprintDisksEditor;
-use super::disks_editor::SledDisksEditor;
-use super::EnsureMultiple;
-use super::StorageEditCounts;
-use illumos_utils::zpool::ZpoolName;
-use nexus_types::deployment::blueprint_zone_type;
-use nexus_types::deployment::BlueprintDatasetsConfig;
-use nexus_types::deployment::BlueprintPhysicalDiskConfig;
-use nexus_types::deployment::BlueprintPhysicalDisksConfig;
-use nexus_types::deployment::BlueprintZoneConfig;
-use nexus_types::deployment::BlueprintZoneType;
-use nexus_types::deployment::SledResources;
-use omicron_common::disk::CompressionAlgorithm;
-use omicron_common::disk::DatasetKind;
-use omicron_common::disk::DatasetName;
-use omicron_uuid_kinds::PhysicalDiskUuid;
-use omicron_uuid_kinds::SledUuid;
-use std::collections::BTreeMap;
-
-#[derive(Debug)]
-pub(super) struct BlueprintStorageEditor {
-    disks: BlueprintDisksEditor,
-    datasets: BlueprintDatasetsEditor,
-}
-
-impl BlueprintStorageEditor {
-    pub fn new(
-        disks: BTreeMap<SledUuid, BlueprintPhysicalDisksConfig>,
-        datasets: BTreeMap<SledUuid, BlueprintDatasetsConfig>,
-    ) -> Self {
-        Self {
-            disks: BlueprintDisksEditor::new(disks),
-            datasets: BlueprintDatasetsEditor::new(datasets),
-        }
-    }
-
-    pub fn sled_storage_editor<'a>(
-        &'a mut self,
-        sled_id: SledUuid,
-        sled_resources: &SledResources,
-        rng: &'a mut PlannerRng,
-    ) -> Result<SledStorageEditor<'a>, BlueprintDatasetsEditError> {
-        let disks = self.disks.sled_disks_editor(sled_id);
-        let datasets =
-            self.datasets.sled_datasets_editor(sled_id, sled_resources, rng)?;
-        Ok(SledStorageEditor { disks, datasets })
-    }
-
-    pub fn current_sled_disks(
-        &self,
-        sled_id: &SledUuid,
-    ) -> Option<&BTreeMap<PhysicalDiskUuid, BlueprintPhysicalDiskConfig>> {
-        self.disks.current_sled_disks(sled_id)
-    }
-
-    pub fn into_blueprint_maps(
-        self,
-        sled_ids: impl Iterator<Item = SledUuid> + Clone,
-    ) -> (
-        BTreeMap<SledUuid, BlueprintPhysicalDisksConfig>,
-        BTreeMap<SledUuid, BlueprintDatasetsConfig>,
-    ) {
-        (self.disks.build(sled_ids.clone()), self.datasets.build(sled_ids))
-    }
-}
-
-#[derive(Debug)]
-pub(super) struct SledStorageEditor<'a> {
-    disks: SledDisksEditor<'a>,
-    datasets: SledDatasetsEditor<'a>,
-}
-
-impl SledStorageEditor<'_> {
-    pub fn disk_ids(&self) -> impl Iterator<Item = PhysicalDiskUuid> + '_ {
-        self.disks.disk_ids()
-    }
-
-    pub fn ensure_disk(&mut self, disk: BlueprintPhysicalDiskConfig) {
-        let zpool = ZpoolName::new_external(disk.pool_id);
-
-        self.disks.ensure_disk(disk);
-        self.datasets.ensure_debug_dataset(zpool.clone());
-        self.datasets.ensure_zone_root_dataset(zpool);
-    }
-
-    pub fn remove_disk(
-        &mut self,
-        disk_id: &PhysicalDiskUuid,
-    ) -> Option<ZpoolName> {
-        let Some(disk) = self.disks.remove_disk(disk_id) else {
-            return None;
-        };
-        self.datasets
-            .expunge_datasets_if(|dataset| dataset.pool.id() == disk.pool_id);
-        Some(ZpoolName::new_external(disk.pool_id))
-    }
-
-    pub fn ensure_zone_datasets(&mut self, zone: &BlueprintZoneConfig) {
-        // TODO check that zpools are on valid disks?
-
-        // Dataset for transient zone filesystem
-        if let Some(fs_zpool) = &zone.filesystem_pool {
-            let name = zone_name(&zone);
-            let address = None;
-            let quota = None;
-            let reservation = None;
-            self.datasets.ensure_dataset(
-                DatasetName::new(
-                    fs_zpool.clone(),
-                    DatasetKind::TransientZone { name },
-                ),
-                address,
-                quota,
-                reservation,
-                CompressionAlgorithm::Off,
-            );
-        }
-
-        // Dataset for durable dataset co-located with zone
-        if let Some(dataset) = zone.zone_type.durable_dataset() {
-            let zpool = &dataset.dataset.pool_name;
-
-            if let Some(fs_zpool) = &zone.filesystem_pool {
-                debug_assert_eq!(
-                    zpool, fs_zpool,
-                    "zone has durable dataset and transient root \
-                     on different zpools"
-                );
-            }
-
-            let address = match zone.zone_type {
-                BlueprintZoneType::Crucible(
-                    blueprint_zone_type::Crucible { address, .. },
-                ) => Some(address),
-                _ => None,
-            };
-            let quota = None;
-            let reservation = None;
-            self.datasets.ensure_dataset(
-                DatasetName::new(zpool.clone(), dataset.kind),
-                address,
-                quota,
-                reservation,
-                CompressionAlgorithm::Off,
-            );
-        }
-    }
-
-    pub fn expunge_zone_datasets(
-        &mut self,
-        zone: &BlueprintZoneConfig,
-    ) -> EnsureMultiple {
-        let mut expunged = 0;
-
-        if zone.filesystem_pool.is_some() {
-            let name = zone_name(&zone);
-            let kind = DatasetKind::TransientZone { name };
-            expunged += self.datasets.expunge_datasets_if(|dataset_config| {
-                dataset_config.kind == kind
-            });
-        }
-
-        if let Some(dataset) = zone.zone_type.durable_dataset() {
-            expunged += self.datasets.expunge_datasets_if(|dataset_config| {
-                dataset_config.pool == dataset.dataset.pool_name
-                    && dataset_config.kind == dataset.kind
-            });
-        }
-
-        if expunged == 0 {
-            EnsureMultiple::NotNeeded
-        } else {
-            EnsureMultiple::Changed {
-                added: 0,
-                updated: 0,
-                expunged,
-                removed: 0,
-            }
-        }
-    }
-
-    pub fn finalize(self) -> StorageEditCounts {
-        StorageEditCounts {
-            disks: self.disks.finalize(),
-            datasets: self.datasets.finalize(),
-        }
-    }
-}
-
-pub(super) fn zone_name(zone: &BlueprintZoneConfig) -> String {
-    illumos_utils::zone::zone_name(
-        zone.zone_type.kind().zone_prefix(),
-        Some(zone.id),
-    )
-}
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs
index 725835f4ae..bab6476456 100644
--- a/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs
+++ b/nexus/reconfigurator/planning/src/blueprint_builder/mod.rs
@@ -8,7 +8,6 @@ mod builder;
 mod clickhouse;
 mod external_networking;
 mod internal_dns;
-mod zones;
 
 pub use builder::*;
 pub use clickhouse::{ClickhouseAllocator, ClickhouseZonesThatShouldBeRunning};
diff --git a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs b/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs
deleted file mode 100644
index 672331ab81..0000000000
--- a/nexus/reconfigurator/planning/src/blueprint_builder/zones.rs
+++ /dev/null
@@ -1,527 +0,0 @@
-// This Source Code Form is subject to the terms of the Mozilla Public
-// License, v. 2.0. If a copy of the MPL was not distributed with this
-// file, You can obtain one at https://mozilla.org/MPL/2.0/.
-
-use std::collections::BTreeSet;
-
-use nexus_types::deployment::{
-    BlueprintZoneConfig, BlueprintZoneDisposition, BlueprintZoneFilter,
-    BlueprintZonesConfig,
-};
-use omicron_common::api::external::Generation;
-use omicron_uuid_kinds::OmicronZoneUuid;
-use thiserror::Error;
-
-#[derive(Debug)]
-#[must_use]
-pub(super) struct BuilderZonesConfig {
-    // The current generation -- this is bumped at blueprint build time and is
-    // otherwise not exposed to callers.
-    generation: Generation,
-
-    // The list of zones, along with their state.
-    zones: Vec<BuilderZoneConfig>,
-}
-
-impl BuilderZonesConfig {
-    pub(super) fn new() -> Self {
-        Self {
-            // Note that the first generation is reserved to mean the one
-            // containing no zones. See
-            // OmicronZonesConfig::INITIAL_GENERATION.
-            //
-            // Since we're currently assuming that creating a new
-            // `BuilderZonesConfig` means that we're going to add new zones
-            // shortly, we start with Generation::new() here. It'll get
-            // bumped up to the next one in `Self::build`.
-            generation: Generation::new(),
-            zones: vec![],
-        }
-    }
-
-    pub(super) fn from_parent(parent: &BlueprintZonesConfig) -> Self {
-        Self {
-            // We'll bump this up at build time.
-            generation: parent.generation,
-
-            zones: parent
-                .zones
-                .iter()
-                .map(|zone| BuilderZoneConfig {
-                    zone: zone.clone(),
-                    state: BuilderZoneState::Unchanged,
-                })
-                .collect(),
-        }
-    }
-
-    pub(super) fn add_zone(
-        &mut self,
-        zone: BlueprintZoneConfig,
-    ) -> Result<(), BuilderZonesConfigError> {
-        if self.zones.iter().any(|z| z.zone.id == zone.id) {
-            // We shouldn't be trying to add zones that already exist --
-            // something went wrong in the planner logic.
-            return Err(BuilderZonesConfigError::AddExistingZone {
-                zone_id: zone.id,
-            });
-        };
-
-        self.zones
-            .push(BuilderZoneConfig { zone, state: BuilderZoneState::Added });
-        Ok(())
-    }
-
-    // On success, returns the now-expunged zone and whether or not it was set
-    // to expunged (as opposed to already being marked expunged).
-    pub(super) fn expunge_zone(
-        &mut self,
-        zone_id: OmicronZoneUuid,
-    ) -> Result<(&BuilderZoneConfig, bool), BuilderZonesConfigError> {
-        let zone = self
-            .zones
-            .iter_mut()
-            .find(|zone| zone.zone.id == zone_id)
-            .ok_or_else(|| {
-            let mut unmatched = BTreeSet::new();
-            unmatched.insert(zone_id);
-            BuilderZonesConfigError::ExpungeUnmatchedZones { unmatched }
-        })?;
-
-        // Check that the zone is expungeable. Typically, zones passed
-        // in here should have had this check done to them already, but
-        // in case they're not, or in case something else about those
-        // zones changed in between, check again.
-        let needs_expunged = !is_already_expunged(&zone.zone, zone.state)?;
-
-        if needs_expunged {
-            zone.zone.disposition = BlueprintZoneDisposition::Expunged;
-            zone.state = BuilderZoneState::Modified;
-        }
-
-        Ok((&*zone, needs_expunged))
-    }
-
-    pub(super) fn expunge_zones(
-        &mut self,
-        mut zones: BTreeSet<OmicronZoneUuid>,
-    ) -> Result<Vec<&BlueprintZoneConfig>, BuilderZonesConfigError> {
-        let mut removed = Vec::new();
-
-        for zone in &mut self.zones {
-            if zones.remove(&zone.zone.id) {
-                // Check that the zone is expungeable. Typically, zones passed
-                // in here should have had this check done to them already, but
-                // in case they're not, or in case something else about those
-                // zones changed in between, check again.
-                is_already_expunged(&zone.zone, zone.state)?;
-                zone.zone.disposition = BlueprintZoneDisposition::Expunged;
-                zone.state = BuilderZoneState::Modified;
-                removed.push(&zone.zone);
-            }
-        }
-
-        // All zones passed in should have been found -- are there any left
-        // over?
-        if !zones.is_empty() {
-            return Err(BuilderZonesConfigError::ExpungeUnmatchedZones {
-                unmatched: zones,
-            });
-        }
-
-        Ok(removed)
-    }
-
-    pub(super) fn iter_zones(
-        &self,
-        filter: BlueprintZoneFilter,
-    ) -> impl Iterator<Item = &BuilderZoneConfig> {
-        self.zones.iter().filter(move |z| z.zone().disposition.matches(filter))
-    }
-
-    pub(super) fn build(self) -> BlueprintZonesConfig {
-        // Only bump the generation if any zones have been changed.
-        let generation = if self
-            .zones
-            .iter()
-            .any(|z| z.state != BuilderZoneState::Unchanged)
-        {
-            self.generation.next()
-        } else {
-            self.generation
-        };
-
-        let mut ret = BlueprintZonesConfig {
-            generation,
-            zones: self.zones.into_iter().map(|z| z.zone).collect(),
-        };
-        ret.sort();
-        ret
-    }
-}
-
-pub(super) fn is_already_expunged(
-    zone: &BlueprintZoneConfig,
-    state: BuilderZoneState,
-) -> Result<bool, BuilderZonesConfigError> {
-    match zone.disposition {
-        BlueprintZoneDisposition::InService
-        | BlueprintZoneDisposition::Quiesced => {
-            if state != BuilderZoneState::Unchanged {
-                // We shouldn't be trying to expunge zones that have also been
-                // changed in this blueprint -- something went wrong in the planner
-                // logic.
-                return Err(BuilderZonesConfigError::ExpungeModifiedZone {
-                    zone_id: zone.id,
-                    state,
-                });
-            }
-            Ok(false)
-        }
-        BlueprintZoneDisposition::Expunged => {
-            // Treat expungement as idempotent.
-            Ok(true)
-        }
-    }
-}
-
-#[derive(Debug)]
-pub(super) struct BuilderZoneConfig {
-    zone: BlueprintZoneConfig,
-    state: BuilderZoneState,
-}
-
-impl BuilderZoneConfig {
-    pub(super) fn zone(&self) -> &BlueprintZoneConfig {
-        &self.zone
-    }
-
-    pub(super) fn state(&self) -> BuilderZoneState {
-        self.state
-    }
-}
-
-#[derive(Copy, Clone, Debug, PartialEq, Eq)]
-pub(super) enum BuilderZoneState {
-    Unchanged,
-    Modified,
-    Added,
-}
-
-#[derive(Clone, Debug, PartialEq, Eq, Error)]
-pub(super) enum BuilderZonesConfigError {
-    #[error("attempted to add zone that already exists: {zone_id}")]
-    AddExistingZone { zone_id: OmicronZoneUuid },
-    #[error(
-        "attempted to expunge zone {zone_id} that was in state {state:?} \
-         (can only expunge unchanged zones)"
-    )]
-    ExpungeModifiedZone { zone_id: OmicronZoneUuid, state: BuilderZoneState },
-    #[error(
-        "while expunging zones, not all zones provided were found: {unmatched:?}"
-    )]
-    ExpungeUnmatchedZones { unmatched: BTreeSet<OmicronZoneUuid> },
-}
-
-#[cfg(test)]
-mod tests {
-    use std::{
-        collections::BTreeMap,
-        net::{Ipv6Addr, SocketAddrV6},
-    };
-
-    use maplit::btreeset;
-    use nexus_sled_agent_shared::inventory::ZoneKind;
-    use nexus_types::deployment::SledDisk;
-    use nexus_types::external_api::views::PhysicalDiskPolicy;
-    use nexus_types::external_api::views::PhysicalDiskState;
-    use nexus_types::{
-        deployment::{
-            blueprint_zone_type, BlueprintZoneType, SledDetails, SledFilter,
-            SledResources,
-        },
-        external_api::views::{SledPolicy, SledState},
-    };
-    use omicron_common::address::Ipv6Subnet;
-    use omicron_common::disk::DiskIdentity;
-    use omicron_test_utils::dev::test_setup_log;
-    use omicron_uuid_kinds::PhysicalDiskUuid;
-    use omicron_uuid_kinds::ZpoolUuid;
-
-    use crate::{
-        blueprint_builder::{test::verify_blueprint, BlueprintBuilder, Ensure},
-        example::{ExampleSystemBuilder, SimRngState},
-        planner::rng::PlannerRng,
-    };
-
-    use super::*;
-
-    /// A test focusing on `BlueprintZonesBuilder` and its internal logic.
-    #[test]
-    fn test_builder_zones() {
-        static TEST_NAME: &str = "blueprint_test_builder_zones";
-        let logctx = test_setup_log(TEST_NAME);
-
-        let mut rng = SimRngState::from_seed(TEST_NAME);
-        let (example, blueprint_initial) = ExampleSystemBuilder::new_with_rng(
-            &logctx.log,
-            rng.next_system_rng(),
-        )
-        .build();
-
-        // Add a completely bare sled to the input.
-        let (new_sled_id, input2) = {
-            let mut sled_id_rng = rng.next_sled_id_rng();
-            let new_sled_id = sled_id_rng.next();
-
-            let mut input = example.input.clone().into_builder();
-
-            input
-                .add_sled(
-                    new_sled_id,
-                    SledDetails {
-                        policy: SledPolicy::provisionable(),
-                        state: SledState::Active,
-                        resources: SledResources {
-                            subnet: Ipv6Subnet::new(
-                                "fd00:1::".parse().unwrap(),
-                            ),
-                            zpools: BTreeMap::from([(
-                                ZpoolUuid::new_v4(),
-                                (
-                                    SledDisk {
-                                        disk_identity: DiskIdentity {
-                                            vendor: String::from("fake-vendor"),
-                                            serial: String::from("fake-serial"),
-                                            model: String::from("fake-model"),
-                                        },
-                                        disk_id: PhysicalDiskUuid::new_v4(),
-                                        policy: PhysicalDiskPolicy::InService,
-                                        state: PhysicalDiskState::Active,
-                                    },
-                                    // Datasets: Leave empty
-                                    vec![],
-                                ),
-                            )]),
-                        },
-                    },
-                )
-                .expect("adding new sled");
-
-            (new_sled_id, input.build())
-        };
-
-        let existing_sled_id = example
-            .input
-            .all_sled_ids(SledFilter::Commissioned)
-            .next()
-            .expect("at least one sled present");
-
-        let mut builder = BlueprintBuilder::new_based_on(
-            &logctx.log,
-            &blueprint_initial,
-            &input2,
-            &example.collection,
-            "the_test",
-        )
-        .expect("creating blueprint builder");
-        builder.set_rng(PlannerRng::from_seed((TEST_NAME, "bp2")));
-        let new_sled_resources = &input2
-            .sled_lookup(SledFilter::Commissioned, new_sled_id)
-            .unwrap()
-            .resources;
-
-        // Test adding a new sled with an NTP zone.
-        builder.sled_ensure_disks(new_sled_id, new_sled_resources).unwrap();
-        assert_eq!(
-            builder.sled_ensure_zone_ntp(new_sled_id).unwrap(),
-            Ensure::Added
-        );
-
-        // Iterate over the zones for the sled and ensure that the NTP zone is
-        // present.
-        {
-            let mut zones = builder.zones.current_sled_zones(
-                new_sled_id,
-                BlueprintZoneFilter::ShouldBeRunning,
-            );
-            let (_, state) = zones.next().expect("exactly one zone for sled");
-            assert!(zones.next().is_none(), "exactly one zone for sled");
-            assert_eq!(
-                state,
-                BuilderZoneState::Added,
-                "NTP zone should have been added"
-            );
-        }
-
-        // Now, test adding a new zone (Oximeter, picked arbitrarily) to an
-        // existing sled.
-        let filesystem_pool = builder
-            .sled_select_zpool_for_tests(existing_sled_id, ZoneKind::Oximeter)
-            .expect("chose zpool for new zone");
-        let change = builder.zones.change_sled_zones(existing_sled_id);
-        let new_zone_id = OmicronZoneUuid::new_v4();
-        change
-            .add_zone(BlueprintZoneConfig {
-                disposition: BlueprintZoneDisposition::InService,
-                id: new_zone_id,
-                filesystem_pool: Some(filesystem_pool),
-                zone_type: BlueprintZoneType::Oximeter(
-                    blueprint_zone_type::Oximeter {
-                        address: SocketAddrV6::new(
-                            Ipv6Addr::UNSPECIFIED,
-                            0,
-                            0,
-                            0,
-                        ),
-                    },
-                ),
-            })
-            .expect("adding new zone");
-
-        // Attempt to expunge one of the other zones on the sled.
-        let existing_zone_id = change
-            .iter_zones(BlueprintZoneFilter::ShouldBeRunning)
-            .find(|z| z.zone.id != new_zone_id)
-            .expect("at least one existing zone")
-            .zone
-            .id;
-        change
-            .expunge_zones(btreeset! { existing_zone_id })
-            .expect("expunging existing zone");
-        // Do it again to ensure that expunging an already-expunged zone is
-        // idempotent, even within the same blueprint.
-        change
-            .expunge_zones(btreeset! { existing_zone_id })
-            .expect("expunging already-expunged zone");
-        // But expunging a zone that doesn't exist should fail.
-        let non_existent_zone_id = OmicronZoneUuid::new_v4();
-        let non_existent_set = btreeset! { non_existent_zone_id };
-        let error = change
-            .expunge_zones(non_existent_set.clone())
-            .expect_err("expunging non-existent zone");
-        assert_eq!(
-            error,
-            BuilderZonesConfigError::ExpungeUnmatchedZones {
-                unmatched: non_existent_set
-            }
-        );
-
-        {
-            // Iterate over the zones and ensure that the Oximeter zone is
-            // present, and marked added.
-            let mut zones = builder.zones.current_sled_zones(
-                existing_sled_id,
-                BlueprintZoneFilter::ShouldBeRunning,
-            );
-            zones
-                .find_map(|(z, state)| {
-                    if z.id == new_zone_id {
-                        assert_eq!(
-                            state,
-                            BuilderZoneState::Added,
-                            "new zone ID {new_zone_id} should be marked added"
-                        );
-                        Some(())
-                    } else {
-                        None
-                    }
-                })
-                .expect("new zone ID should be present");
-        }
-
-        // Attempt to expunge the newly added Oximeter zone. This should fail
-        // because we only support expunging zones that are unchanged from the
-        // parent blueprint.
-        let error = builder
-            .zones
-            .change_sled_zones(existing_sled_id)
-            .expunge_zones(btreeset! { new_zone_id })
-            .expect_err("expunging a new zone should fail");
-        assert_eq!(
-            error,
-            BuilderZonesConfigError::ExpungeModifiedZone {
-                zone_id: new_zone_id,
-                state: BuilderZoneState::Added
-            }
-        );
-
-        // Ensure all datasets are created for the zones we've provisioned
-        for (sled_id, resources) in
-            input2.all_sled_resources(SledFilter::Commissioned)
-        {
-            builder.sled_ensure_zone_datasets(sled_id, resources).unwrap();
-        }
-
-        // Now build the blueprint and ensure that all the changes we described
-        // above are present.
-        let blueprint = builder.build();
-        verify_blueprint(&blueprint);
-        let diff = blueprint.diff_since_blueprint(&blueprint_initial);
-        println!("expecting new NTP and Oximeter zones:\n{}", diff.display());
-
-        // No sleds were removed.
-        assert_eq!(diff.sleds_removed.len(), 0);
-
-        // One sled was added.
-        assert_eq!(diff.sleds_added.len(), 1);
-        let sled_id = diff.sleds_added.first().unwrap();
-        assert_eq!(*sled_id, new_sled_id);
-        let new_sled_zones = diff.zones.added.get(sled_id).unwrap();
-        // The generation number should be newer than the initial default.
-        assert_eq!(
-            new_sled_zones.generation_after.unwrap(),
-            Generation::new().next()
-        );
-        assert_eq!(new_sled_zones.zones.len(), 1);
-
-        // TODO: AJS - See comment above - we don't actually use the control sled anymore
-        // so the comparison was changed.
-        // One sled was modified: existing_sled_id
-        assert_eq!(diff.sleds_modified.len(), 1, "1 sled modified");
-        for sled_id in &diff.sleds_modified {
-            assert_eq!(*sled_id, existing_sled_id);
-            let added = diff.zones.added.get(sled_id).unwrap();
-            assert_eq!(
-                added.generation_after.unwrap(),
-                added.generation_before.unwrap().next()
-            );
-            assert_eq!(added.zones.len(), 1);
-            let added_zone = &added.zones[0];
-            assert_eq!(added_zone.id(), new_zone_id);
-
-            assert!(!diff.zones.removed.contains_key(sled_id));
-            let modified = diff.zones.modified.get(sled_id).unwrap();
-            assert_eq!(modified.zones.len(), 1);
-            let modified_zone = &modified.zones[0];
-            assert_eq!(modified_zone.zone.id(), existing_zone_id);
-        }
-
-        // Test a no-op change.
-        {
-            let mut builder = BlueprintBuilder::new_based_on(
-                &logctx.log,
-                &blueprint,
-                &input2,
-                &example.collection,
-                "the_test",
-            )
-            .expect("creating blueprint builder");
-            builder.set_rng(PlannerRng::from_seed((TEST_NAME, "bp2")));
-
-            // This call by itself shouldn't bump the generation number.
-            builder.zones.change_sled_zones(existing_sled_id);
-
-            let blueprint_noop = builder.build();
-            verify_blueprint(&blueprint_noop);
-            let diff = blueprint_noop.diff_since_blueprint(&blueprint);
-            println!("expecting a noop:\n{}", diff.display());
-
-            assert!(diff.sleds_modified.is_empty(), "no sleds modified");
-            assert!(diff.sleds_added.is_empty(), "no sleds added");
-            assert!(diff.sleds_removed.is_empty(), "no sleds removed");
-        }
-
-        logctx.cleanup_successful();
-    }
-}
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor.rs
new file mode 100644
index 0000000000..652b541de1
--- /dev/null
+++ b/nexus/reconfigurator/planning/src/blueprint_editor.rs
@@ -0,0 +1,14 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! High-level facilities for editing Blueprints
+//!
+//! See crate-level documentation for details.
+
+mod sled_editor;
+
+pub(crate) use sled_editor::DatasetIdsBackfillFromDb;
+pub(crate) use sled_editor::EditedSled;
+pub(crate) use sled_editor::SledEditError;
+pub(crate) use sled_editor::SledEditor;
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
new file mode 100644
index 0000000000..13094b97a4
--- /dev/null
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor.rs
@@ -0,0 +1,329 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Support for editing the blueprint details of a single sled.
+
+use crate::blueprint_builder::SledEditCounts;
+use crate::planner::PlannerRng;
+use illumos_utils::zpool::ZpoolName;
+use nexus_types::deployment::blueprint_zone_type;
+use nexus_types::deployment::BlueprintDatasetsConfig;
+use nexus_types::deployment::BlueprintPhysicalDiskConfig;
+use nexus_types::deployment::BlueprintPhysicalDisksConfig;
+use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::deployment::BlueprintZoneFilter;
+use nexus_types::deployment::BlueprintZoneType;
+use nexus_types::deployment::BlueprintZonesConfig;
+use nexus_types::deployment::DiskFilter;
+use nexus_types::external_api::views::SledState;
+use omicron_uuid_kinds::OmicronZoneUuid;
+use omicron_uuid_kinds::PhysicalDiskUuid;
+
+mod datasets;
+mod disks;
+mod zones;
+
+pub(crate) use self::datasets::DatasetIdsBackfillFromDb;
+
+pub use self::datasets::DatasetsEditError;
+pub use self::datasets::MultipleDatasetsOfKind;
+pub use self::disks::DisksEditError;
+pub use self::disks::DuplicateDiskId;
+pub use self::zones::DuplicateZoneId;
+pub use self::zones::ZonesEditError;
+
+use self::datasets::DatasetsEditor;
+use self::datasets::PartialDatasetConfig;
+use self::disks::DisksEditor;
+use self::zones::ZonesEditor;
+
+#[derive(Debug, thiserror::Error)]
+pub enum SledInputError {
+    #[error(transparent)]
+    DuplicateZoneId(#[from] DuplicateZoneId),
+    #[error(transparent)]
+    DuplicateDiskId(#[from] DuplicateDiskId),
+    #[error(transparent)]
+    MultipleDatasetsOfKind(#[from] MultipleDatasetsOfKind),
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum SledEditError {
+    #[error("failed to edit disks")]
+    EditDisks(#[from] DisksEditError),
+    #[error("failed to edit datasets")]
+    EditDatasetsError(#[from] DatasetsEditError),
+    #[error("failed to edit zones")]
+    EditZones(#[from] ZonesEditError),
+    #[error(
+        "invalid configuration for zone {zone_id}: \
+         filesystem root zpool ({fs_zpool}) and durable dataset zpool \
+         ({dur_zpool}) should be the same"
+    )]
+    ZoneInvalidZpoolCombination {
+        zone_id: OmicronZoneUuid,
+        fs_zpool: ZpoolName,
+        dur_zpool: ZpoolName,
+    },
+    #[error(
+        "invalid configuration for zone {zone_id}: \
+         zpool ({zpool}) is not present in this sled's disks"
+    )]
+    ZoneOnNonexistentZpool { zone_id: OmicronZoneUuid, zpool: ZpoolName },
+}
+
+#[derive(Debug)]
+pub(crate) struct SledEditor {
+    state: SledState,
+    zones: ZonesEditor,
+    disks: DisksEditor,
+    datasets: DatasetsEditor,
+}
+
+#[derive(Debug)]
+pub(crate) struct EditedSled {
+    pub state: SledState,
+    pub zones: BlueprintZonesConfig,
+    pub disks: BlueprintPhysicalDisksConfig,
+    pub datasets: BlueprintDatasetsConfig,
+    pub edit_counts: SledEditCounts,
+}
+
+impl SledEditor {
+    pub fn new(
+        state: SledState,
+        zones: BlueprintZonesConfig,
+        disks: BlueprintPhysicalDisksConfig,
+        datasets: BlueprintDatasetsConfig,
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Result<Self, SledInputError> {
+        Ok(Self {
+            state,
+            zones: zones.try_into()?,
+            disks: disks.try_into()?,
+            datasets: DatasetsEditor::new(datasets, preexisting_dataset_ids)?,
+        })
+    }
+
+    pub fn new_empty(
+        state: SledState,
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Self {
+        Self {
+            state,
+            zones: ZonesEditor::empty(),
+            disks: DisksEditor::empty(),
+            datasets: DatasetsEditor::empty(preexisting_dataset_ids),
+        }
+    }
+
+    pub fn finalize(self) -> EditedSled {
+        let (disks, disks_counts) = self.disks.finalize();
+        let (datasets, datasets_counts) = self.datasets.finalize();
+        let (zones, zones_counts) = self.zones.finalize();
+        EditedSled {
+            state: self.state,
+            zones,
+            disks,
+            datasets,
+            edit_counts: SledEditCounts {
+                disks: disks_counts,
+                datasets: datasets_counts,
+                zones: zones_counts,
+            },
+        }
+    }
+
+    pub fn edit_counts(&self) -> SledEditCounts {
+        SledEditCounts {
+            disks: self.disks.edit_counts(),
+            datasets: self.datasets.edit_counts(),
+            zones: self.zones.edit_counts(),
+        }
+    }
+
+    pub fn set_state(&mut self, new_state: SledState) {
+        self.state = new_state;
+    }
+
+    pub fn disks(
+        &self,
+        filter: DiskFilter,
+    ) -> impl Iterator<Item = &BlueprintPhysicalDiskConfig> {
+        self.disks.disks(filter)
+    }
+
+    pub fn zones(
+        &self,
+        filter: BlueprintZoneFilter,
+    ) -> impl Iterator<Item = &BlueprintZoneConfig> {
+        self.zones.zones(filter)
+    }
+
+    pub fn ensure_disk(
+        &mut self,
+        disk: BlueprintPhysicalDiskConfig,
+        rng: &mut PlannerRng,
+    ) {
+        let zpool = ZpoolName::new_external(disk.pool_id);
+
+        self.disks.ensure(disk);
+
+        // Every disk also gets a Debug and Transient Zone Root dataset; ensure
+        // both of those exist as well.
+        let debug = PartialDatasetConfig::for_debug(zpool.clone());
+        let zone_root = PartialDatasetConfig::for_transient_zone_root(zpool);
+
+        self.datasets.ensure_in_service(debug, rng);
+        self.datasets.ensure_in_service(zone_root, rng);
+    }
+
+    pub fn expunge_disk(
+        &mut self,
+        disk_id: &PhysicalDiskUuid,
+    ) -> Result<(), SledEditError> {
+        let zpool_id = self.disks.expunge(disk_id)?;
+
+        // When we expunge a disk, we must also expunge any datasets on it, and
+        // any zones that relied on those datasets.
+        self.datasets.expunge_all_on_zpool(&zpool_id);
+        self.zones.expunge_all_on_zpool(&zpool_id);
+
+        Ok(())
+    }
+
+    pub fn add_zone(
+        &mut self,
+        zone: BlueprintZoneConfig,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        // Ensure we can construct the configs for the datasets for this zone.
+        let datasets = ZoneDatasetConfigs::new(&self.disks, &zone)?;
+
+        // Actually add the zone and its datasets.
+        self.zones.add_zone(zone)?;
+        datasets.ensure_in_service(&mut self.datasets, rng);
+
+        Ok(())
+    }
+
+    pub fn expunge_zone(
+        &mut self,
+        zone_id: &OmicronZoneUuid,
+    ) -> Result<(), SledEditError> {
+        let (did_expunge, config) = self.zones.expunge(zone_id)?;
+
+        // If we didn't actually expunge the zone in this edit, we don't
+        // move on and expunge its datasets. This is to guard against
+        // accidentally exposing a different zone's datasets (if that zone has
+        // happens to have the same dataset kind as us and is running on the
+        // same zpool as us, which is only possible if we were previously
+        // expunged).
+        //
+        // This wouldn't be necessary if `config` tracked its dataset IDs
+        // explicitly instead of only recording its zpool; once we fix that we
+        // should be able to remove this check.
+        if !did_expunge {
+            return Ok(());
+        }
+
+        if let Some(dataset) = config.filesystem_dataset() {
+            self.datasets.expunge(&dataset.pool().id(), dataset.dataset())?;
+        }
+        if let Some(dataset) = config.zone_type.durable_dataset() {
+            self.datasets
+                .expunge(&dataset.dataset.pool_name.id(), &dataset.kind)?;
+        }
+
+        Ok(())
+    }
+
+    /// Backwards compatibility / test helper: If we're given a blueprint that
+    /// has zones but wasn't created via `SledEditor`, it might not have
+    /// datasets for all its zones. This method backfills them.
+    pub fn ensure_datasets_for_running_zones(
+        &mut self,
+        rng: &mut PlannerRng,
+    ) -> Result<(), SledEditError> {
+        for zone in self.zones.zones(BlueprintZoneFilter::ShouldBeRunning) {
+            ZoneDatasetConfigs::new(&self.disks, zone)?
+                .ensure_in_service(&mut self.datasets, rng);
+        }
+        Ok(())
+    }
+}
+
+#[derive(Debug)]
+struct ZoneDatasetConfigs {
+    filesystem: Option<PartialDatasetConfig>,
+    durable: Option<PartialDatasetConfig>,
+}
+
+impl ZoneDatasetConfigs {
+    fn new(
+        disks: &DisksEditor,
+        zone: &BlueprintZoneConfig,
+    ) -> Result<Self, SledEditError> {
+        let filesystem_dataset = zone
+            .filesystem_dataset()
+            .map(|dataset| PartialDatasetConfig::for_transient_zone(dataset));
+        let durable_dataset = zone.zone_type.durable_dataset().map(|dataset| {
+            // `dataset` records include an optional socket address, which is
+            // only applicable for durable datasets backing crucible. This this
+            // is a little fishy and might go away with
+            // https://github.com/oxidecomputer/omicron/issues/6998.
+            let address = match &zone.zone_type {
+                BlueprintZoneType::Crucible(
+                    blueprint_zone_type::Crucible { address, .. },
+                ) => Some(*address),
+                _ => None,
+            };
+            PartialDatasetConfig::for_durable_zone(
+                dataset.dataset.pool_name.clone(),
+                dataset.kind,
+                address,
+            )
+        });
+
+        // Ensure that if this zone has both kinds of datasets, they reside on
+        // the same zpool.
+        if let (Some(fs), Some(dur)) = (&filesystem_dataset, &durable_dataset) {
+            if fs.zpool() != dur.zpool() {
+                return Err(SledEditError::ZoneInvalidZpoolCombination {
+                    zone_id: zone.id,
+                    fs_zpool: fs.zpool().clone(),
+                    dur_zpool: dur.zpool().clone(),
+                });
+            }
+        }
+
+        // Ensure that if we have a zpool, we have a matching disk (i.e., a zone
+        // can't be added if it has a dataset on a zpool that we don't have)
+        if let Some(dataset) =
+            filesystem_dataset.as_ref().or(durable_dataset.as_ref())
+        {
+            if !disks.contains_zpool(&dataset.zpool().id()) {
+                return Err(SledEditError::ZoneOnNonexistentZpool {
+                    zone_id: zone.id,
+                    zpool: dataset.zpool().clone(),
+                });
+            }
+        }
+
+        Ok(Self { filesystem: filesystem_dataset, durable: durable_dataset })
+    }
+
+    fn ensure_in_service(
+        self,
+        datasets: &mut DatasetsEditor,
+        rng: &mut PlannerRng,
+    ) {
+        if let Some(dataset) = self.filesystem {
+            datasets.ensure_in_service(dataset, rng);
+        }
+        if let Some(dataset) = self.durable {
+            datasets.ensure_in_service(dataset, rng);
+        }
+    }
+}
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
new file mode 100644
index 0000000000..de397b9caa
--- /dev/null
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/datasets.rs
@@ -0,0 +1,776 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::blueprint_builder::EditCounts;
+use crate::planner::PlannerRng;
+use illumos_utils::zpool::ZpoolName;
+use nexus_types::deployment::BlueprintDatasetConfig;
+use nexus_types::deployment::BlueprintDatasetDisposition;
+use nexus_types::deployment::BlueprintDatasetFilter;
+use nexus_types::deployment::BlueprintDatasetsConfig;
+use nexus_types::deployment::SledResources;
+use nexus_types::deployment::ZpoolFilter;
+use omicron_common::api::external::ByteCount;
+use omicron_common::api::external::Generation;
+use omicron_common::disk::CompressionAlgorithm;
+use omicron_common::disk::DatasetKind;
+use omicron_common::disk::DatasetName;
+use omicron_common::disk::GzipLevel;
+use omicron_uuid_kinds::DatasetUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+use std::collections::BTreeSet;
+use std::net::SocketAddrV6;
+
+#[derive(Debug, thiserror::Error)]
+#[error(
+    "invalid blueprint input: multiple datasets with kind {kind:?} \
+     on zpool {zpool_id}: {id1}, {id2}"
+)]
+pub struct MultipleDatasetsOfKind {
+    zpool_id: ZpoolUuid,
+    kind: DatasetKind,
+    id1: DatasetUuid,
+    id2: DatasetUuid,
+}
+
+#[derive(Debug, thiserror::Error)]
+pub enum DatasetsEditError {
+    #[error(
+        "tried to expunge nonexistent dataset: \
+         zpool {zpool_id}, kind {kind}"
+    )]
+    ExpungeNonexistentDataset { zpool_id: ZpoolUuid, kind: DatasetKind },
+}
+
+/// TODO(<https://github.com/oxidecomputer/omicron/issues/6645>): In between
+/// the addition of datasets to blueprints and knowing all deployed system
+/// have _generated_ a blueprint that populates datasets, we are in a sticky
+/// situation where a dataset might have already existed in CRDB with an ID,
+/// but the blueprint system doesn't know about it. We accept a map of all
+/// existing dataset IDs, and then when determining the ID of a dataset,
+/// we'll try these in order:
+///
+/// 1. Is the dataset in our blueprint already? If so, use its ID.
+/// 2. Is the dataset in `preexisting_database_ids`? If so, use that ID.
+/// 3. Generate a new random ID.
+#[derive(Debug)]
+pub(crate) struct DatasetIdsBackfillFromDb(
+    BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
+);
+
+impl DatasetIdsBackfillFromDb {
+    pub fn build(
+        resources: &SledResources,
+    ) -> Result<Self, MultipleDatasetsOfKind> {
+        let iter = resources.all_datasets(ZpoolFilter::InService).flat_map(
+            |(&zpool_id, configs)| {
+                configs.iter().map(move |config| {
+                    (zpool_id, config.name.dataset().clone(), config.id)
+                })
+            },
+        );
+
+        let mut kind_id_map: BTreeMap<
+            ZpoolUuid,
+            BTreeMap<DatasetKind, DatasetUuid>,
+        > = BTreeMap::new();
+
+        for (zpool_id, kind, dataset_id) in iter {
+            let dataset_ids_by_kind = kind_id_map.entry(zpool_id).or_default();
+            match dataset_ids_by_kind.entry(kind) {
+                Entry::Vacant(slot) => {
+                    slot.insert(dataset_id);
+                }
+                Entry::Occupied(prev) => {
+                    return Err(MultipleDatasetsOfKind {
+                        zpool_id,
+                        kind: prev.key().clone(),
+                        id1: *prev.get(),
+                        id2: dataset_id,
+                    });
+                }
+            }
+        }
+        Ok(Self(kind_id_map))
+    }
+
+    pub fn empty() -> Self {
+        Self(BTreeMap::new())
+    }
+}
+
+impl DatasetIdsBackfillFromDb {
+    fn get(
+        &self,
+        zpool_id: &ZpoolUuid,
+        kind: &DatasetKind,
+    ) -> Option<DatasetUuid> {
+        self.0.get(zpool_id).and_then(|by_kind| by_kind.get(kind).copied())
+    }
+}
+
+/// Container for most of the information needed to construct a
+/// `BlueprintDatasetConfig`.
+///
+/// Omitted from this set are the disposition (in practice, this will typically
+/// be "in service", as one constructs a `PartialDatasetConfig` to describe a
+/// dataset that should be in service) and the ID. Dataset IDs are a little
+/// tricky at the moment (see `DatasetIdsBackfillFromDb` above), so they're
+/// determined internally by `DatasetsEditor`.
+#[derive(Debug)]
+pub(crate) struct PartialDatasetConfig {
+    pub name: DatasetName,
+    pub address: Option<SocketAddrV6>,
+    pub quota: Option<ByteCount>,
+    pub reservation: Option<ByteCount>,
+    pub compression: CompressionAlgorithm,
+}
+
+impl PartialDatasetConfig {
+    pub fn zpool(&self) -> &ZpoolName {
+        self.name.pool()
+    }
+
+    pub fn for_debug(zpool: ZpoolName) -> Self {
+        const DEBUG_QUOTA_SIZE_GB: u32 = 100;
+
+        Self {
+            name: DatasetName::new(zpool, DatasetKind::Debug),
+            address: None,
+            quota: Some(ByteCount::from_gibibytes_u32(DEBUG_QUOTA_SIZE_GB)),
+            reservation: None,
+            compression: CompressionAlgorithm::GzipN {
+                level: GzipLevel::new::<9>(),
+            },
+        }
+    }
+
+    pub fn for_transient_zone_root(zpool: ZpoolName) -> Self {
+        Self {
+            name: DatasetName::new(zpool, DatasetKind::TransientZoneRoot),
+            address: None,
+            quota: None,
+            reservation: None,
+            compression: CompressionAlgorithm::Off,
+        }
+    }
+
+    pub fn for_transient_zone(name: DatasetName) -> Self {
+        assert!(
+            matches!(name.dataset(), DatasetKind::TransientZone { .. }),
+            "for_transient_zone called with incorrect dataset kind: {name:?}"
+        );
+        Self {
+            name,
+            address: None,
+            quota: None,
+            reservation: None,
+            compression: CompressionAlgorithm::Off,
+        }
+    }
+
+    pub fn for_durable_zone(
+        zpool: ZpoolName,
+        kind: DatasetKind,
+        address: Option<SocketAddrV6>,
+    ) -> Self {
+        Self {
+            name: DatasetName::new(zpool, kind),
+            address,
+            quota: None,
+            reservation: None,
+            compression: CompressionAlgorithm::Off,
+        }
+    }
+}
+
+#[derive(Debug)]
+pub(super) struct DatasetsEditor {
+    preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    config: BlueprintDatasetsConfig,
+    // Cache of _in service only_ datasets, identified by (zpool, kind).
+    in_service_by_zpool_and_kind:
+        BTreeMap<ZpoolUuid, BTreeMap<DatasetKind, DatasetUuid>>,
+    // Cache of _expunged_ dataset IDs. This serves as a list of IDs from
+    // `preexisting_dataset_ids` to ignore, as we shouldn't reuse old IDs if
+    // they belong to expunged datasets. We should be able to remove this when
+    // we remove `preexisting_dataset_ids`.
+    expunged_datasets: BTreeSet<DatasetUuid>,
+    counts: EditCounts,
+}
+
+impl DatasetsEditor {
+    pub fn new(
+        config: BlueprintDatasetsConfig,
+        preexisting_dataset_ids: DatasetIdsBackfillFromDb,
+    ) -> Result<Self, MultipleDatasetsOfKind> {
+        let mut in_service_by_zpool_and_kind = BTreeMap::new();
+        let mut expunged_datasets = BTreeSet::new();
+        for dataset in config.datasets.values() {
+            match dataset.disposition {
+                BlueprintDatasetDisposition::InService => {
+                    let by_kind: &mut BTreeMap<_, _> =
+                        in_service_by_zpool_and_kind
+                            .entry(dataset.pool.id())
+                            .or_default();
+                    match by_kind.entry(dataset.kind.clone()) {
+                        Entry::Vacant(slot) => {
+                            slot.insert(dataset.id);
+                        }
+                        Entry::Occupied(prev) => {
+                            return Err(MultipleDatasetsOfKind {
+                                zpool_id: dataset.pool.id(),
+                                kind: dataset.kind.clone(),
+                                id1: *prev.get(),
+                                id2: dataset.id,
+                            });
+                        }
+                    }
+                }
+                BlueprintDatasetDisposition::Expunged => {
+                    expunged_datasets.insert(dataset.id);
+                }
+            }
+        }
+        Ok(Self {
+            preexisting_dataset_ids,
+            config,
+            in_service_by_zpool_and_kind,
+            expunged_datasets,
+            counts: EditCounts::zeroes(),
+        })
+    }
+
+    pub fn empty(preexisting_dataset_ids: DatasetIdsBackfillFromDb) -> Self {
+        Self {
+            preexisting_dataset_ids,
+            config: BlueprintDatasetsConfig {
+                generation: Generation::new(),
+                datasets: BTreeMap::new(),
+            },
+            in_service_by_zpool_and_kind: BTreeMap::new(),
+            expunged_datasets: BTreeSet::new(),
+            counts: EditCounts::zeroes(),
+        }
+    }
+
+    pub fn finalize(self) -> (BlueprintDatasetsConfig, EditCounts) {
+        let mut config = self.config;
+        if self.counts.has_nonzero_counts() {
+            config.generation = config.generation.next();
+        }
+        (config, self.counts)
+    }
+
+    pub fn edit_counts(&self) -> EditCounts {
+        self.counts
+    }
+
+    #[allow(dead_code)] // currently only used by tests; this will change soon
+    pub fn datasets(
+        &self,
+        filter: BlueprintDatasetFilter,
+    ) -> impl Iterator<Item = &BlueprintDatasetConfig> {
+        self.config
+            .datasets
+            .values()
+            .filter(move |dataset| dataset.disposition.matches(filter))
+    }
+
+    // Private method; panics if given an ID that isn't present in
+    // `self.config.datasets`. Callers must ensure the ID is valid.
+    fn expunge_by_known_valid_id(&mut self, id: DatasetUuid) {
+        let dataset = self
+            .config
+            .datasets
+            .get_mut(&id)
+            .expect("expunge_impl called with invalid ID");
+        match dataset.disposition {
+            BlueprintDatasetDisposition::InService => {
+                dataset.disposition = BlueprintDatasetDisposition::Expunged;
+                self.counts.expunged += 1;
+            }
+            BlueprintDatasetDisposition::Expunged => {
+                // already expunged; nothing to do
+            }
+        }
+        self.expunged_datasets.insert(dataset.id);
+    }
+
+    /// Expunge a dataset identified by its zpool + kind combo.
+    ///
+    /// TODO-cleanup This is a little fishy and should be replaced with
+    /// an expunge-by-ID method instead, but that requires some rework
+    /// (<https://github.com/oxidecomputer/omicron/issues/7214>).
+    pub fn expunge(
+        &mut self,
+        zpool: &ZpoolUuid,
+        kind: &DatasetKind,
+    ) -> Result<(), DatasetsEditError> {
+        let Some(id) = self
+            .in_service_by_zpool_and_kind
+            .get_mut(zpool)
+            .and_then(|by_kind| by_kind.remove(kind))
+        else {
+            return Err(DatasetsEditError::ExpungeNonexistentDataset {
+                zpool_id: *zpool,
+                kind: kind.clone(),
+            });
+        };
+        self.expunge_by_known_valid_id(id);
+        Ok(())
+    }
+
+    pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) {
+        let Some(by_kind) = self.in_service_by_zpool_and_kind.remove(zpool)
+        else {
+            return;
+        };
+
+        for id in by_kind.into_values() {
+            self.expunge_by_known_valid_id(id);
+        }
+    }
+
+    pub fn ensure_in_service(
+        &mut self,
+        dataset: PartialDatasetConfig,
+        rng: &mut PlannerRng,
+    ) -> &BlueprintDatasetConfig {
+        // Convert the partial config into a full config by finding or
+        // generating its ID.
+        let PartialDatasetConfig {
+            name,
+            address,
+            quota,
+            reservation,
+            compression,
+        } = dataset;
+        let (pool, kind) = name.into_parts();
+
+        let id = {
+            // If there is a dataset of the given `kind` on the given
+            // `zpool`, find its ID.
+            //
+            // This prefers IDs we already have; if we don't have one, it
+            // falls back to backfilling based on IDs recorded in the
+            // database from before blueprints tracked datasets (see
+            // `DatasetIdsBackfillFromDb` above).
+            if let Some(blueprint_id) = self
+                .in_service_by_zpool_and_kind
+                .get(&pool.id())
+                .and_then(|by_kind| by_kind.get(&kind).copied())
+            {
+                blueprint_id
+            } else if let Some(preexisting_database_id) =
+                self.preexisting_dataset_ids.get(&pool.id(), &kind)
+            {
+                // Only use old database IDs if this ID hasn't been expunged.
+                //
+                // This check won't work if there's a preexisting_database_id
+                // for an old dataset that has been both expunged _and removed_,
+                // as we have no way of knowing about completely removed
+                // datasets. However:
+                //
+                // 1. `DatasetIdsBackfillFromDb::build()` filters to only
+                //    in-service datasets, so we should never find a database ID
+                //    for a removed dataset.
+                // 2. We don't yet ever remove datasets anyway, and hopefully
+                //    `DatasetIdsBackfillFromDb` is entirely removed by then (it
+                //    should be removeable after R12, once we've guaranteed all
+                //    blueprints have datasets).
+                if !self.expunged_datasets.contains(&preexisting_database_id) {
+                    preexisting_database_id
+                } else {
+                    rng.next_dataset()
+                }
+            } else {
+                rng.next_dataset()
+            }
+        };
+
+        let dataset = BlueprintDatasetConfig {
+            disposition: BlueprintDatasetDisposition::InService,
+            id,
+            pool,
+            kind,
+            address,
+            quota,
+            reservation,
+            compression,
+        };
+
+        // Add or update our config with this new dataset info.
+        match self.config.datasets.entry(dataset.id) {
+            Entry::Vacant(slot) => {
+                self.in_service_by_zpool_and_kind
+                    .entry(dataset.pool.id())
+                    .or_default()
+                    .insert(dataset.kind.clone(), dataset.id);
+                self.counts.added += 1;
+                &*slot.insert(dataset)
+            }
+            Entry::Occupied(mut prev) => {
+                if *prev.get() != dataset {
+                    self.counts.updated += 1;
+                    prev.insert(dataset);
+                }
+                &*prev.into_mut()
+            }
+        }
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use nexus_types::deployment::BlueprintDatasetFilter;
+    use omicron_uuid_kinds::GenericUuid;
+    use proptest::prelude::*;
+    use std::collections::BTreeSet;
+    use test_strategy::proptest;
+    use test_strategy::Arbitrary;
+    use uuid::Uuid;
+
+    // Helper functions to "tag" an iterator (i.e., turn it into an iterator of
+    // tuples) for use with `build_test_config()` below.
+    fn all_in_service<I>(
+        value: I,
+    ) -> impl Iterator<Item = (BlueprintDatasetDisposition, DatasetKind)>
+    where
+        I: IntoIterator<Item = DatasetKind>,
+    {
+        value
+            .into_iter()
+            .map(|kind| (BlueprintDatasetDisposition::InService, kind))
+    }
+    fn all_expunged<I>(
+        value: I,
+    ) -> impl Iterator<Item = (BlueprintDatasetDisposition, DatasetKind)>
+    where
+        I: IntoIterator<Item = DatasetKind>,
+    {
+        value
+            .into_iter()
+            .map(|kind| (BlueprintDatasetDisposition::Expunged, kind))
+    }
+
+    fn build_test_config<I, J>(values: I) -> BlueprintDatasetsConfig
+    where
+        I: Iterator<Item = J>,
+        J: Iterator<Item = (BlueprintDatasetDisposition, DatasetKind)>,
+    {
+        let mut datasets = BTreeMap::new();
+        let mut dataset_id_index = 0;
+        for (zpool_id_index, disposition_kinds) in values.enumerate() {
+            let zpool_id = ZpoolUuid::from_untyped_uuid(Uuid::from_u128(
+                zpool_id_index as u128,
+            ));
+            for (disposition, kind) in disposition_kinds {
+                let id = {
+                    let id = DatasetUuid::from_untyped_uuid(Uuid::from_u128(
+                        dataset_id_index,
+                    ));
+                    dataset_id_index += 1;
+                    id
+                };
+                let dataset = BlueprintDatasetConfig {
+                    disposition,
+                    id,
+                    pool: ZpoolName::new_external(zpool_id),
+                    kind,
+                    address: None,
+                    quota: None,
+                    reservation: None,
+                    compression: CompressionAlgorithm::Off,
+                };
+                let prev = datasets.insert(id, dataset);
+                assert!(prev.is_none(), "no duplicate dataset IDs");
+            }
+        }
+        let mut generation = Generation::new();
+        if dataset_id_index > 0 {
+            generation = generation.next();
+        }
+        BlueprintDatasetsConfig { generation, datasets }
+    }
+
+    #[derive(Debug, Arbitrary)]
+    struct DatasetKindSet {
+        #[strategy(prop::collection::btree_set(any::<DatasetKind>(), 0..16))]
+        kinds: BTreeSet<DatasetKind>,
+    }
+
+    #[derive(Debug, Arbitrary)]
+    struct ZpoolsWithInServiceDatasets {
+        #[strategy(prop::collection::vec(any::<DatasetKindSet>(), 0..10))]
+        by_zpool: Vec<DatasetKindSet>,
+    }
+
+    impl ZpoolsWithInServiceDatasets {
+        fn into_config(self) -> BlueprintDatasetsConfig {
+            build_test_config(
+                self.by_zpool
+                    .into_iter()
+                    .map(|kinds| all_in_service(kinds.kinds)),
+            )
+        }
+    }
+
+    #[derive(Debug, Arbitrary)]
+    struct DatasetKindVec {
+        #[strategy(prop::collection::vec(any::<DatasetKind>(), 0..32))]
+        kinds: Vec<DatasetKind>,
+    }
+
+    #[derive(Debug, Arbitrary)]
+    struct ZpoolsWithExpungedDatasets {
+        #[strategy(prop::collection::vec(any::<DatasetKindVec>(), 0..10))]
+        by_zpool: Vec<DatasetKindVec>,
+    }
+
+    impl ZpoolsWithExpungedDatasets {
+        fn into_config(self) -> BlueprintDatasetsConfig {
+            build_test_config(
+                self.by_zpool
+                    .into_iter()
+                    .map(|kinds| all_expunged(kinds.kinds)),
+            )
+        }
+    }
+
+    // Proptest helper to construct zpools with both in-service datasets (the
+    // first element of the tuple: a set of kinds) and expunged datasets (the
+    // second element of the tuple: a vec of kinds).
+    #[derive(Debug, Arbitrary)]
+    struct ZpoolsWithMixedDatasets {
+        #[strategy(prop::collection::vec(any::<(DatasetKindSet, DatasetKindVec)>(), 0..10))]
+        by_zpool: Vec<(DatasetKindSet, DatasetKindVec)>,
+    }
+
+    impl ZpoolsWithMixedDatasets {
+        fn into_config(self) -> BlueprintDatasetsConfig {
+            build_test_config(self.by_zpool.into_iter().map(
+                |(in_service, expunged)| {
+                    all_in_service(in_service.kinds)
+                        .chain(all_expunged(expunged.kinds))
+                },
+            ))
+        }
+    }
+
+    #[proptest]
+    fn proptest_create_editor_with_in_service_datasets(
+        by_zpool: ZpoolsWithInServiceDatasets,
+    ) {
+        _ = DatasetsEditor::new(
+            by_zpool.into_config(),
+            DatasetIdsBackfillFromDb::empty(),
+        )
+        .expect("built editor");
+    }
+
+    #[proptest]
+    fn proptest_create_editor_with_expunged_datasets(
+        by_zpool: ZpoolsWithExpungedDatasets,
+    ) {
+        _ = DatasetsEditor::new(
+            by_zpool.into_config(),
+            DatasetIdsBackfillFromDb::empty(),
+        )
+        .expect("built editor");
+    }
+
+    #[proptest]
+    fn proptest_add_same_kind_after_expunging(
+        initial: ZpoolsWithMixedDatasets,
+        rng_seed: u32,
+    ) {
+        let config = initial.into_config();
+        let mut editor = DatasetsEditor::new(
+            config.clone(),
+            DatasetIdsBackfillFromDb::empty(),
+        )
+        .expect("built editor");
+
+        let mut rng = PlannerRng::from_seed((
+            rng_seed,
+            "proptest_add_same_kind_after_expunging",
+        ));
+
+        // For each originally-in-service dataset:
+        //
+        // 1. Expunge that dataset
+        // 2. Add a new dataset of the same kind
+        // 3. Ensure the new dataset ID is freshly-generated
+        for dataset in config.datasets.values().filter(|dataset| {
+            dataset.disposition.matches(BlueprintDatasetFilter::InService)
+        }) {
+            editor
+                .expunge(&dataset.pool.id(), &dataset.kind)
+                .expect("expunged dataset");
+
+            let new_dataset = PartialDatasetConfig {
+                name: DatasetName::new(
+                    dataset.pool.clone(),
+                    dataset.kind.clone(),
+                ),
+                address: dataset.address,
+                quota: dataset.quota,
+                reservation: dataset.reservation,
+                compression: dataset.compression,
+            };
+            let new_dataset = editor.ensure_in_service(new_dataset, &mut rng);
+            assert_ne!(dataset.id, new_dataset.id);
+        }
+
+        // Repeat the test above, but this time assume all the dataset IDs were
+        // also present in the backfill database map. We should not reuse IDs
+        // after expunging zones.
+        let database_backfill = {
+            let mut by_zpool: BTreeMap<_, BTreeMap<_, _>> = BTreeMap::new();
+            for dataset in config.datasets.values().filter(|dataset| {
+                dataset.disposition.matches(BlueprintDatasetFilter::InService)
+            }) {
+                let prev = by_zpool
+                    .entry(dataset.pool.id())
+                    .or_default()
+                    .insert(dataset.kind.clone(), dataset.id);
+                assert!(
+                    prev.is_none(),
+                    "duplicate (pool,kind) in-service input"
+                );
+            }
+            DatasetIdsBackfillFromDb(by_zpool)
+        };
+        let mut editor = DatasetsEditor::new(config.clone(), database_backfill)
+            .expect("built editor");
+        for dataset in config.datasets.values().filter(|dataset| {
+            dataset.disposition.matches(BlueprintDatasetFilter::InService)
+        }) {
+            editor
+                .expunge(&dataset.pool.id(), &dataset.kind)
+                .expect("expunged dataset");
+
+            let new_dataset = PartialDatasetConfig {
+                name: DatasetName::new(
+                    dataset.pool.clone(),
+                    dataset.kind.clone(),
+                ),
+                address: dataset.address,
+                quota: dataset.quota,
+                reservation: dataset.reservation,
+                compression: dataset.compression,
+            };
+            let new_dataset = editor.ensure_in_service(new_dataset, &mut rng);
+            assert_ne!(dataset.id, new_dataset.id);
+        }
+    }
+
+    #[proptest]
+    fn proptest_add_same_kind_after_expunging_by_zpool(
+        initial: ZpoolsWithMixedDatasets,
+        rng_seed: u32,
+    ) {
+        let config = initial.into_config();
+        let all_zpools = config
+            .datasets
+            .values()
+            .map(|dataset| dataset.pool.id())
+            .collect::<BTreeSet<_>>();
+        let mut editor = DatasetsEditor::new(
+            config.clone(),
+            DatasetIdsBackfillFromDb::empty(),
+        )
+        .expect("built editor");
+
+        let mut rng = PlannerRng::from_seed((
+            rng_seed,
+            "proptest_add_same_kind_after_expunging",
+        ));
+
+        // Expunge all datasets on all zpools, by zpool.
+        for zpool_id in &all_zpools {
+            editor.expunge_all_on_zpool(zpool_id);
+            // There should no longer be any in-service datasets on this zpool.
+            assert!(
+                !editor
+                    .datasets(BlueprintDatasetFilter::InService)
+                    .any(|dataset| dataset.pool.id() == *zpool_id),
+                "in-service dataset remains after expunging zpool"
+            );
+        }
+
+        // For each originally-in-service dataset:
+        //
+        // 1. Add a new dataset of the same kind
+        // 2. Ensure the new dataset ID is freshly-generated
+        for dataset in config.datasets.values().filter(|dataset| {
+            dataset.disposition.matches(BlueprintDatasetFilter::InService)
+        }) {
+            let new_dataset = PartialDatasetConfig {
+                name: DatasetName::new(
+                    dataset.pool.clone(),
+                    dataset.kind.clone(),
+                ),
+                address: dataset.address,
+                quota: dataset.quota,
+                reservation: dataset.reservation,
+                compression: dataset.compression,
+            };
+            let new_dataset = editor.ensure_in_service(new_dataset, &mut rng);
+            assert_ne!(dataset.id, new_dataset.id);
+        }
+
+        // Repeat the test above, but this time assume all the dataset IDs were
+        // also present in the backfill database map. We should not reuse IDs
+        // after expunging zones.
+        let database_backfill = {
+            let mut by_zpool: BTreeMap<_, BTreeMap<_, _>> = BTreeMap::new();
+            for dataset in config.datasets.values().filter(|dataset| {
+                dataset.disposition.matches(BlueprintDatasetFilter::InService)
+            }) {
+                let prev = by_zpool
+                    .entry(dataset.pool.id())
+                    .or_default()
+                    .insert(dataset.kind.clone(), dataset.id);
+                assert!(
+                    prev.is_none(),
+                    "duplicate (pool,kind) in-service input"
+                );
+            }
+            DatasetIdsBackfillFromDb(by_zpool)
+        };
+        let mut editor = DatasetsEditor::new(config.clone(), database_backfill)
+            .expect("built editor");
+        for zpool_id in &all_zpools {
+            editor.expunge_all_on_zpool(zpool_id);
+            // There should no longer be any in-service datasets on this zpool.
+            assert!(
+                !editor
+                    .datasets(BlueprintDatasetFilter::InService)
+                    .any(|dataset| dataset.pool.id() == *zpool_id),
+                "in-service dataset remains after expunging zpool"
+            );
+        }
+        for dataset in config.datasets.values().filter(|dataset| {
+            dataset.disposition.matches(BlueprintDatasetFilter::InService)
+        }) {
+            let new_dataset = PartialDatasetConfig {
+                name: DatasetName::new(
+                    dataset.pool.clone(),
+                    dataset.kind.clone(),
+                ),
+                address: dataset.address,
+                quota: dataset.quota,
+                reservation: dataset.reservation,
+                compression: dataset.compression,
+            };
+            let new_dataset = editor.ensure_in_service(new_dataset, &mut rng);
+            assert_ne!(dataset.id, new_dataset.id);
+        }
+    }
+}
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/disks.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/disks.rs
new file mode 100644
index 0000000000..f7c0dcba36
--- /dev/null
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/disks.rs
@@ -0,0 +1,145 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::blueprint_builder::EditCounts;
+use nexus_types::deployment::BlueprintPhysicalDiskConfig;
+use nexus_types::deployment::BlueprintPhysicalDiskDisposition;
+use nexus_types::deployment::BlueprintPhysicalDisksConfig;
+use nexus_types::deployment::DiskFilter;
+use omicron_common::api::external::Generation;
+use omicron_uuid_kinds::PhysicalDiskUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+
+#[derive(Debug, thiserror::Error)]
+pub enum DisksEditError {
+    #[error("tried to expunge nonexistent disk {id}")]
+    ExpungeNonexistentDisk { id: PhysicalDiskUuid },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[error(
+    "invalid blueprint input: duplicate disk ID {id} \
+     (zpools: {zpool1:?}, {zpool2:?})"
+)]
+pub struct DuplicateDiskId {
+    pub id: PhysicalDiskUuid,
+    pub zpool1: ZpoolUuid,
+    pub zpool2: ZpoolUuid,
+}
+
+#[derive(Debug)]
+pub(super) struct DisksEditor {
+    generation: Generation,
+    disks: BTreeMap<PhysicalDiskUuid, BlueprintPhysicalDiskConfig>,
+    counts: EditCounts,
+}
+
+impl DisksEditor {
+    pub fn empty() -> Self {
+        Self {
+            generation: Generation::new(),
+            disks: BTreeMap::new(),
+            counts: EditCounts::zeroes(),
+        }
+    }
+
+    pub fn finalize(self) -> (BlueprintPhysicalDisksConfig, EditCounts) {
+        let mut generation = self.generation;
+        if self.counts.has_nonzero_counts() {
+            generation = generation.next();
+        }
+
+        (
+            BlueprintPhysicalDisksConfig {
+                generation,
+                disks: self.disks.into_values().collect(),
+            },
+            self.counts,
+        )
+    }
+
+    pub fn edit_counts(&self) -> EditCounts {
+        self.counts
+    }
+
+    pub fn disks(
+        &self,
+        filter: DiskFilter,
+    ) -> impl Iterator<Item = &BlueprintPhysicalDiskConfig> {
+        self.disks
+            .values()
+            .filter(move |config| config.disposition.matches(filter))
+    }
+
+    pub fn contains_zpool(&self, zpool_id: &ZpoolUuid) -> bool {
+        self.disks.values().any(|disk| disk.pool_id == *zpool_id)
+    }
+
+    pub fn ensure(&mut self, disk: BlueprintPhysicalDiskConfig) {
+        match self.disks.entry(disk.id) {
+            Entry::Vacant(slot) => {
+                slot.insert(disk);
+                self.counts.added += 1;
+            }
+            Entry::Occupied(mut slot) => {
+                if *slot.get() != disk {
+                    slot.insert(disk);
+                    self.counts.updated += 1;
+                }
+            }
+        }
+    }
+
+    pub fn expunge(
+        &mut self,
+        disk_id: &PhysicalDiskUuid,
+    ) -> Result<ZpoolUuid, DisksEditError> {
+        let config = self.disks.get_mut(disk_id).ok_or_else(|| {
+            DisksEditError::ExpungeNonexistentDisk { id: *disk_id }
+        })?;
+
+        match config.disposition {
+            BlueprintPhysicalDiskDisposition::InService => {
+                config.disposition = BlueprintPhysicalDiskDisposition::Expunged;
+                self.counts.expunged += 1;
+            }
+            BlueprintPhysicalDiskDisposition::Expunged => {
+                // expunge is idempotent; do nothing
+            }
+        }
+
+        Ok(config.pool_id)
+    }
+}
+
+impl TryFrom<BlueprintPhysicalDisksConfig> for DisksEditor {
+    type Error = DuplicateDiskId;
+
+    fn try_from(
+        config: BlueprintPhysicalDisksConfig,
+    ) -> Result<Self, Self::Error> {
+        let mut disks = BTreeMap::new();
+        for disk in config.disks {
+            match disks.entry(disk.id) {
+                Entry::Vacant(slot) => {
+                    slot.insert(disk);
+                }
+                Entry::Occupied(prev) => {
+                    return Err(DuplicateDiskId {
+                        id: disk.id,
+                        zpool1: disk.pool_id,
+                        zpool2: prev.get().pool_id,
+                    });
+                }
+            }
+        }
+        Ok(Self {
+            generation: config.generation,
+            disks,
+            counts: EditCounts::zeroes(),
+        })
+    }
+}
diff --git a/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/zones.rs b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/zones.rs
new file mode 100644
index 0000000000..5a5c7a1807
--- /dev/null
+++ b/nexus/reconfigurator/planning/src/blueprint_editor/sled_editor/zones.rs
@@ -0,0 +1,181 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+use crate::blueprint_builder::EditCounts;
+use nexus_sled_agent_shared::inventory::ZoneKind;
+use nexus_types::deployment::BlueprintZoneConfig;
+use nexus_types::deployment::BlueprintZoneDisposition;
+use nexus_types::deployment::BlueprintZoneFilter;
+use nexus_types::deployment::BlueprintZonesConfig;
+use omicron_common::api::external::Generation;
+use omicron_uuid_kinds::OmicronZoneUuid;
+use omicron_uuid_kinds::ZpoolUuid;
+use std::collections::btree_map::Entry;
+use std::collections::BTreeMap;
+
+#[derive(Debug, thiserror::Error)]
+pub enum ZonesEditError {
+    #[error(
+        "tried to add duplicate zone ID {id} (kinds: {kind1:?}, {kind2:?})"
+    )]
+    AddDuplicateZoneId { id: OmicronZoneUuid, kind1: ZoneKind, kind2: ZoneKind },
+    #[error("tried to expunge nonexistent zone {id}")]
+    ExpungeNonexistentZone { id: OmicronZoneUuid },
+}
+
+#[derive(Debug, thiserror::Error)]
+#[error(
+    "invalid blueprint input: duplicate zone ID {id} \
+     (kinds: {kind1:?}, {kind2:?})"
+)]
+pub struct DuplicateZoneId {
+    pub id: OmicronZoneUuid,
+    pub kind1: ZoneKind,
+    pub kind2: ZoneKind,
+}
+
+#[derive(Debug)]
+pub(super) struct ZonesEditor {
+    generation: Generation,
+    zones: BTreeMap<OmicronZoneUuid, BlueprintZoneConfig>,
+    counts: EditCounts,
+}
+
+impl ZonesEditor {
+    pub fn empty() -> Self {
+        Self {
+            generation: Generation::new(),
+            zones: BTreeMap::new(),
+            counts: EditCounts::zeroes(),
+        }
+    }
+
+    pub fn finalize(self) -> (BlueprintZonesConfig, EditCounts) {
+        let mut generation = self.generation;
+        if self.counts.has_nonzero_counts() {
+            generation = generation.next();
+        }
+        let mut config = BlueprintZonesConfig {
+            generation,
+            zones: self.zones.into_values().collect(),
+        };
+        config.sort();
+        (config, self.counts)
+    }
+
+    pub fn edit_counts(&self) -> EditCounts {
+        self.counts
+    }
+
+    pub fn zones(
+        &self,
+        filter: BlueprintZoneFilter,
+    ) -> impl Iterator<Item = &BlueprintZoneConfig> {
+        self.zones
+            .values()
+            .filter(move |config| config.disposition.matches(filter))
+    }
+
+    pub fn add_zone(
+        &mut self,
+        zone: BlueprintZoneConfig,
+    ) -> Result<(), ZonesEditError> {
+        match self.zones.entry(zone.id) {
+            Entry::Vacant(slot) => {
+                slot.insert(zone);
+                self.counts.added += 1;
+                Ok(())
+            }
+            Entry::Occupied(prev) => {
+                // We shouldn't be trying to add zones that already exist --
+                // something went wrong in the planner logic.
+                Err(ZonesEditError::AddDuplicateZoneId {
+                    id: zone.id,
+                    kind1: zone.zone_type.kind(),
+                    kind2: prev.get().zone_type.kind(),
+                })
+            }
+        }
+    }
+
+    /// Expunge a zone, returning `true` if the zone was expunged and `false` if
+    /// the zone was already expunged, along with the updated zone config.
+    pub fn expunge(
+        &mut self,
+        zone_id: &OmicronZoneUuid,
+    ) -> Result<(bool, &BlueprintZoneConfig), ZonesEditError> {
+        let config = self.zones.get_mut(zone_id).ok_or_else(|| {
+            ZonesEditError::ExpungeNonexistentZone { id: *zone_id }
+        })?;
+
+        let did_expunge = Self::expunge_impl(config, &mut self.counts);
+
+        Ok((did_expunge, &*config))
+    }
+
+    fn expunge_impl(
+        config: &mut BlueprintZoneConfig,
+        counts: &mut EditCounts,
+    ) -> bool {
+        match config.disposition {
+            BlueprintZoneDisposition::InService
+            | BlueprintZoneDisposition::Quiesced => {
+                config.disposition = BlueprintZoneDisposition::Expunged;
+                counts.expunged += 1;
+                true
+            }
+            BlueprintZoneDisposition::Expunged => {
+                // expunge is idempotent; do nothing
+                false
+            }
+        }
+    }
+
+    pub fn expunge_all_on_zpool(&mut self, zpool: &ZpoolUuid) {
+        for config in self.zones.values_mut() {
+            // Expunge this zone if its filesystem or durable dataset are on
+            // this zpool. (If it has both, they should be on the _same_ zpool,
+            // but that's not strictly required by this method - we'll expunge a
+            // zone that depends on this zpool in any way.)
+            let fs_is_on_zpool = config
+                .filesystem_pool
+                .as_ref()
+                .map_or(false, |pool| pool.id() == *zpool);
+            let dd_is_on_zpool = config
+                .zone_type
+                .durable_zpool()
+                .map_or(false, |pool| pool.id() == *zpool);
+            if fs_is_on_zpool || dd_is_on_zpool {
+                Self::expunge_impl(config, &mut self.counts);
+            }
+        }
+    }
+}
+
+impl TryFrom<BlueprintZonesConfig> for ZonesEditor {
+    type Error = DuplicateZoneId;
+
+    fn try_from(config: BlueprintZonesConfig) -> Result<Self, Self::Error> {
+        let mut zones = BTreeMap::new();
+        for zone in config.zones {
+            match zones.entry(zone.id) {
+                Entry::Vacant(slot) => {
+                    slot.insert(zone);
+                }
+                Entry::Occupied(prev) => {
+                    return Err(DuplicateZoneId {
+                        id: zone.id,
+                        kind1: zone.zone_type.kind(),
+                        kind2: prev.get().zone_type.kind(),
+                    });
+                }
+            }
+        }
+        Ok(Self {
+            generation: config.generation,
+            zones,
+            counts: EditCounts::zeroes(),
+        })
+    }
+}
diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs
index 3848934d19..dfba3f9992 100644
--- a/nexus/reconfigurator/planning/src/example.rs
+++ b/nexus/reconfigurator/planning/src/example.rs
@@ -453,9 +453,7 @@ impl ExampleSystemBuilder {
                         .unwrap();
                 }
             }
-            builder
-                .sled_ensure_zone_datasets(sled_id, &sled_resources)
-                .unwrap();
+            builder.sled_ensure_zone_datasets(sled_id).unwrap();
         }
 
         let blueprint = builder.build();
diff --git a/nexus/reconfigurator/planning/src/lib.rs b/nexus/reconfigurator/planning/src/lib.rs
index a5a47c933d..f6c521c0f8 100644
--- a/nexus/reconfigurator/planning/src/lib.rs
+++ b/nexus/reconfigurator/planning/src/lib.rs
@@ -7,6 +7,7 @@
 //! See docs/reconfigurator.adoc for an overview.
 
 pub mod blueprint_builder;
+pub mod blueprint_editor;
 pub mod example;
 mod ip_allocator;
 pub mod planner;
diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs
index 9bdb29048b..56fc671667 100644
--- a/nexus/reconfigurator/planning/src/planner.rs
+++ b/nexus/reconfigurator/planning/src/planner.rs
@@ -160,7 +160,7 @@ impl<'a> Planner<'a> {
 
             if all_zones_expunged && num_instances_assigned == 0 {
                 self.blueprint
-                    .set_sled_state(sled_id, SledState::Decommissioned);
+                    .set_sled_state(sled_id, SledState::Decommissioned)?;
             }
         }
 
@@ -362,17 +362,13 @@ impl<'a> Planner<'a> {
     }
 
     fn do_plan_datasets(&mut self) -> Result<(), Error> {
-        for (sled_id, sled_resources) in
-            self.input.all_sled_resources(SledFilter::InService)
-        {
+        for sled_id in self.input.all_sled_ids(SledFilter::InService) {
             if let EnsureMultiple::Changed {
                 added,
                 updated,
                 expunged,
                 removed,
-            } = self
-                .blueprint
-                .sled_ensure_zone_datasets(sled_id, &sled_resources)?
+            } = self.blueprint.sled_ensure_zone_datasets(sled_id)?
             {
                 info!(
                     &self.log,
diff --git a/nexus/src/app/metrics.rs b/nexus/src/app/metrics.rs
index 40f7882281..5b77e681b1 100644
--- a/nexus/src/app/metrics.rs
+++ b/nexus/src/app/metrics.rs
@@ -140,28 +140,52 @@ impl super::Nexus {
         self.timeseries_client
             .oxql_query(query)
             .await
-            .map(|result| {
-                // TODO-observability: The query method returns information
-                // about the duration of the OxQL query and the database
-                // resource usage for each contained SQL query. We should
-                // publish this as a timeseries itself, so that we can track
-                // improvements to query processing.
-                //
-                // For now, simply return the tables alone.
-                result.tables
-            })
-            .map_err(|e| match e {
-                oximeter_db::Error::DatabaseUnavailable(_)
-                | oximeter_db::Error::Connection(_) => {
-                    Error::ServiceUnavailable {
-                        internal_message: e.to_string(),
-                    }
-                }
-                oximeter_db::Error::Oxql(_)
-                | oximeter_db::Error::TimeseriesNotFound(_) => {
-                    Error::invalid_request(e.to_string())
-                }
-                _ => Error::InternalError { internal_message: e.to_string() },
-            })
+            // TODO-observability: The query method returns information
+            // about the duration of the OxQL query and the database
+            // resource usage for each contained SQL query. We should
+            // publish this as a timeseries itself, so that we can track
+            // improvements to query processing.
+            //
+            // For now, simply return the tables alone.
+            .map(|result| result.tables)
+            .map_err(map_timeseries_err)
+    }
+
+    /// Run an OxQL query against the timeseries database, scoped to a specific project.
+    pub(crate) async fn timeseries_query_project(
+        &self,
+        _opctx: &OpContext,
+        project_lookup: &lookup::Project<'_>,
+        query: impl AsRef<str>,
+    ) -> Result<Vec<oxql_types::Table>, Error> {
+        // Ensure the user has read access to the project
+        let (authz_silo, authz_project) =
+            project_lookup.lookup_for(authz::Action::Read).await?;
+
+        // Ensure the query only refers to the project
+        let filtered_query = format!(
+            "{} | filter silo_id == \"{}\" && project_id == \"{}\"",
+            query.as_ref(),
+            authz_silo.id(),
+            authz_project.id()
+        );
+
+        self.timeseries_client
+            .oxql_query(filtered_query)
+            .await
+            .map(|result| result.tables)
+            .map_err(map_timeseries_err)
+    }
+}
+
+fn map_timeseries_err(e: oximeter_db::Error) -> Error {
+    match e {
+        oximeter_db::Error::DatabaseUnavailable(_)
+        | oximeter_db::Error::Connection(_) => Error::unavail(&e.to_string()),
+        oximeter_db::Error::Oxql(_)
+        | oximeter_db::Error::TimeseriesNotFound(_) => {
+            Error::invalid_request(e.to_string())
+        }
+        _ => Error::internal_error(&e.to_string()),
     }
 }
diff --git a/nexus/src/app/sagas/region_snapshot_replacement_start.rs b/nexus/src/app/sagas/region_snapshot_replacement_start.rs
index 55927f7de8..4855f64ac2 100644
--- a/nexus/src/app/sagas/region_snapshot_replacement_start.rs
+++ b/nexus/src/app/sagas/region_snapshot_replacement_start.rs
@@ -1041,6 +1041,7 @@ pub(crate) mod test {
 
         let conn = datastore.pool_connection_for_tests().await.unwrap();
 
+        #[allow(clippy::disallowed_methods)]
         conn.transaction_async(|conn| async move {
             // Selecting all regions requires a full table scan
             conn.batch_execute_async(ALLOW_FULL_TABLE_SCAN_SQL).await.unwrap();
diff --git a/nexus/src/external_api/http_entrypoints.rs b/nexus/src/external_api/http_entrypoints.rs
index a285542442..740895b7e4 100644
--- a/nexus/src/external_api/http_entrypoints.rs
+++ b/nexus/src/external_api/http_entrypoints.rs
@@ -5544,6 +5544,33 @@ impl NexusExternalApi for NexusExternalApiImpl {
             .await
     }
 
+    async fn timeseries_query(
+        rqctx: RequestContext<ApiContext>,
+        query_params: Query<params::ProjectSelector>,
+        body: TypedBody<params::TimeseriesQuery>,
+    ) -> Result<HttpResponseOk<views::OxqlQueryResult>, HttpError> {
+        let apictx = rqctx.context();
+        let handler = async {
+            let nexus = &apictx.context.nexus;
+            let opctx =
+                crate::context::op_context_for_external_api(&rqctx).await?;
+            let project_selector = query_params.into_inner();
+            let query = body.into_inner().query;
+            let project_lookup =
+                nexus.project_lookup(&opctx, project_selector)?;
+            nexus
+                .timeseries_query_project(&opctx, &project_lookup, &query)
+                .await
+                .map(|tables| HttpResponseOk(views::OxqlQueryResult { tables }))
+                .map_err(HttpError::from)
+        };
+        apictx
+            .context
+            .external_latencies
+            .instrument_dropshot_handler(&rqctx, handler)
+            .await
+    }
+
     // Updates
 
     async fn system_update_put_repository(
diff --git a/nexus/tests/integration_tests/endpoints.rs b/nexus/tests/integration_tests/endpoints.rs
index 2e7b68eaca..466cae17a8 100644
--- a/nexus/tests/integration_tests/endpoints.rs
+++ b/nexus/tests/integration_tests/endpoints.rs
@@ -948,10 +948,14 @@ pub static DEMO_SILO_METRICS_URL: Lazy<String> = Lazy::new(|| {
     )
 });
 
-pub static TIMESERIES_LIST_URL: Lazy<String> =
+pub static TIMESERIES_QUERY_URL: Lazy<String> = Lazy::new(|| {
+    format!("/v1/timeseries/query?project={}", *DEMO_PROJECT_NAME)
+});
+
+pub static SYSTEM_TIMESERIES_LIST_URL: Lazy<String> =
     Lazy::new(|| String::from("/v1/system/timeseries/schemas"));
 
-pub static TIMESERIES_QUERY_URL: Lazy<String> =
+pub static SYSTEM_TIMESERIES_QUERY_URL: Lazy<String> =
     Lazy::new(|| String::from("/v1/system/timeseries/query"));
 
 pub static DEMO_TIMESERIES_QUERY: Lazy<params::TimeseriesQuery> =
@@ -2208,7 +2212,18 @@ pub static VERIFY_ENDPOINTS: Lazy<Vec<VerifyEndpoint>> = Lazy::new(|| {
         },
 
         VerifyEndpoint {
-            url: &TIMESERIES_LIST_URL,
+            url: &TIMESERIES_QUERY_URL,
+            visibility: Visibility::Protected,
+            unprivileged_access: UnprivilegedAccess::None,
+            allowed_methods: vec![
+                AllowedMethod::Post(
+                    serde_json::to_value(&*DEMO_TIMESERIES_QUERY).unwrap()
+                ),
+            ],
+        },
+
+        VerifyEndpoint {
+            url: &SYSTEM_TIMESERIES_LIST_URL,
             visibility: Visibility::Public,
             unprivileged_access: UnprivilegedAccess::None,
             allowed_methods: vec![
@@ -2217,7 +2232,7 @@ pub static VERIFY_ENDPOINTS: Lazy<Vec<VerifyEndpoint>> = Lazy::new(|| {
         },
 
         VerifyEndpoint {
-            url: &TIMESERIES_QUERY_URL,
+            url: &SYSTEM_TIMESERIES_QUERY_URL,
             visibility: Visibility::Public,
             unprivileged_access: UnprivilegedAccess::None,
             allowed_methods: vec![
diff --git a/nexus/tests/integration_tests/metrics.rs b/nexus/tests/integration_tests/metrics.rs
index a468fa23d5..7e5441c16a 100644
--- a/nexus/tests/integration_tests/metrics.rs
+++ b/nexus/tests/integration_tests/metrics.rs
@@ -9,16 +9,20 @@ use crate::integration_tests::instances::{
 };
 use chrono::Utc;
 use dropshot::test_util::ClientTestContext;
-use dropshot::ResultsPage;
+use dropshot::{HttpErrorResponseBody, ResultsPage};
 use http::{Method, StatusCode};
+use nexus_auth::authn::USER_TEST_UNPRIVILEGED;
+use nexus_db_queries::db::identity::Asset;
+use nexus_test_utils::background::activate_background_task;
 use nexus_test_utils::http_testing::{AuthnMode, NexusRequest, RequestBuilder};
 use nexus_test_utils::resource_helpers::{
     create_default_ip_pool, create_disk, create_instance, create_project,
-    objects_list_page_authz, DiskTest,
+    grant_iam, object_create_error, objects_list_page_authz, DiskTest,
 };
 use nexus_test_utils::wait_for_producer;
 use nexus_test_utils::ControlPlaneTestContext;
 use nexus_test_utils_macros::nexus_test;
+use nexus_types::external_api::shared::ProjectRole;
 use nexus_types::external_api::views::OxqlQueryResult;
 use nexus_types::silo::DEFAULT_SILO_ID;
 use omicron_test_utils::dev::poll::{wait_for_condition, CondCheckError};
@@ -266,7 +270,7 @@ async fn test_metrics(
 
 /// Test that we can correctly list some timeseries schema.
 #[nexus_test]
-async fn test_timeseries_schema_list(
+async fn test_system_timeseries_schema_list(
     cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
 ) {
     // Nexus registers itself as a metric producer on startup, with its own UUID
@@ -297,10 +301,73 @@ async fn test_timeseries_schema_list(
         .expect("Failed to find HTTP request latency histogram schema");
 }
 
-pub async fn timeseries_query(
+/// Run an OxQL query until it succeeds or panics.
+pub async fn system_timeseries_query(
     cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
     query: impl ToString,
 ) -> Vec<oxql_types::Table> {
+    timeseries_query_until_success(
+        cptestctx,
+        "/v1/system/timeseries/query",
+        query,
+    )
+    .await
+}
+
+/// Run a project-scoped OxQL query until it succeeds or panics.
+pub async fn project_timeseries_query(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+    project: &str,
+    query: impl ToString,
+) -> Vec<oxql_types::Table> {
+    timeseries_query_until_success(
+        cptestctx,
+        &format!("/v1/timeseries/query?project={}", project),
+        query,
+    )
+    .await
+}
+
+/// Run an OxQL query until it succeeds or panics.
+async fn timeseries_query_until_success(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+    endpoint: &str,
+    query: impl ToString,
+) -> Vec<oxql_types::Table> {
+    const POLL_INTERVAL: Duration = Duration::from_secs(1);
+    const POLL_MAX: Duration = Duration::from_secs(30);
+    let query_ = query.to_string();
+    wait_for_condition(
+        || async {
+            match execute_timeseries_query(cptestctx, endpoint, &query_).await {
+                Some(r) => Ok(r),
+                None => Err(CondCheckError::<()>::NotYet),
+            }
+        },
+        &POLL_INTERVAL,
+        &POLL_MAX,
+    )
+    .await
+    .unwrap_or_else(|_| {
+        panic!(
+            "Timeseries named in query are not available \
+            after {:?}, query: '{}'",
+            POLL_MAX,
+            query.to_string(),
+        )
+    })
+}
+
+/// Run an OxQL query.
+///
+/// This returns `None` if the query resulted in client error and the body
+/// indicates that a timeseries named in the query could not be found. In all
+/// other cases, it either succeeds or panics.
+pub async fn execute_timeseries_query(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+    endpoint: &str,
+    query: impl ToString,
+) -> Option<Vec<oxql_types::Table>> {
     // first, make sure the latest timeseries have been collected.
     cptestctx
         .oximeter
@@ -317,7 +384,7 @@ pub async fn timeseries_query(
         nexus_test_utils::http_testing::RequestBuilder::new(
             &cptestctx.external_client,
             http::Method::POST,
-            "/v1/system/timeseries/query",
+            endpoint,
         )
         .body(Some(&body)),
     )
@@ -327,14 +394,29 @@ pub async fn timeseries_query(
     .unwrap_or_else(|e| {
         panic!("timeseries query failed: {e:?}\nquery: {query}")
     });
-    rsp.parsed_body::<OxqlQueryResult>()
-        .unwrap_or_else(|e| {
-            panic!(
-                "could not parse timeseries query response: {e:?}\n\
-            query: {query}\nresponse: {rsp:#?}"
-            );
-        })
-        .tables
+
+    // Check for a timeseries-not-found error specifically.
+    if rsp.status.is_client_error() {
+        let text = std::str::from_utf8(&rsp.body)
+            .expect("Timeseries query response body should be UTF-8");
+        if text.contains("Schema for timeseries") && text.contains("not found")
+        {
+            return None;
+        }
+    }
+
+    // Try to parse the query as usual, which will fail on other kinds of
+    // errors.
+    Some(
+        rsp.parsed_body::<OxqlQueryResult>()
+            .unwrap_or_else(|e| {
+                panic!(
+                    "could not parse timeseries query response: {e:?}\n\
+                    query: {query}\nresponse: {rsp:#?}"
+                );
+            })
+            .tables,
+    )
 }
 
 #[nexus_test]
@@ -441,7 +523,7 @@ async fn test_instance_watcher_metrics(
     // activate the instance watcher background task.
     activate_instance_watcher().await;
 
-    let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
+    let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await;
     let checks = metrics
         .iter()
         .find(|t| t.name() == "virtual_machine:check")
@@ -457,7 +539,7 @@ async fn test_instance_watcher_metrics(
     // activate the instance watcher background task.
     activate_instance_watcher().await;
 
-    let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
+    let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await;
     let checks = metrics
         .iter()
         .find(|t| t.name() == "virtual_machine:check")
@@ -474,7 +556,7 @@ async fn test_instance_watcher_metrics(
     // activate the instance watcher background task.
     activate_instance_watcher().await;
 
-    let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
+    let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await;
     let checks = metrics
         .iter()
         .find(|t| t.name() == "virtual_machine:check")
@@ -499,7 +581,7 @@ async fn test_instance_watcher_metrics(
     // activate the instance watcher background task.
     activate_instance_watcher().await;
 
-    let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
+    let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await;
     let checks = metrics
         .iter()
         .find(|t| t.name() == "virtual_machine:check")
@@ -528,7 +610,7 @@ async fn test_instance_watcher_metrics(
     // activate the instance watcher background task.
     activate_instance_watcher().await;
 
-    let metrics = timeseries_query(&cptestctx, OXQL_QUERY).await;
+    let metrics = system_timeseries_query(&cptestctx, OXQL_QUERY).await;
     let checks = metrics
         .iter()
         .find(|t| t.name() == "virtual_machine:check")
@@ -548,6 +630,134 @@ async fn test_instance_watcher_metrics(
     assert_gte!(ts2_running, 2);
 }
 
+#[nexus_test]
+async fn test_project_timeseries_query(
+    cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
+) {
+    let client = &cptestctx.external_client;
+
+    create_default_ip_pool(&client).await; // needed for instance create to work
+
+    // Create two projects
+    let p1 = create_project(&client, "project1").await;
+    let _p2 = create_project(&client, "project2").await;
+
+    // Create resources in each project
+    let i1 = create_instance(&client, "project1", "instance1").await;
+    let _i2 = create_instance(&client, "project2", "instance2").await;
+
+    let internal_client = &cptestctx.internal_client;
+
+    // get the instance metrics to show up
+    let _ =
+        activate_background_task(&internal_client, "instance_watcher").await;
+
+    // Query with no project specified
+    let q1 = "get virtual_machine:check";
+
+    let result = project_timeseries_query(&cptestctx, "project1", q1).await;
+    assert_eq!(result.len(), 1);
+    assert!(result[0].timeseries().len() > 0);
+
+    // also works with project ID
+    let result =
+        project_timeseries_query(&cptestctx, &p1.identity.id.to_string(), q1)
+            .await;
+    assert_eq!(result.len(), 1);
+    assert!(result[0].timeseries().len() > 0);
+
+    let result = project_timeseries_query(&cptestctx, "project2", q1).await;
+    assert_eq!(result.len(), 1);
+    assert!(result[0].timeseries().len() > 0);
+
+    // with project specified
+    let q2 = &format!("{} | filter project_id == \"{}\"", q1, p1.identity.id);
+
+    let result = project_timeseries_query(&cptestctx, "project1", q2).await;
+    assert_eq!(result.len(), 1);
+    assert!(result[0].timeseries().len() > 0);
+
+    let result = project_timeseries_query(&cptestctx, "project2", q2).await;
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].timeseries().len(), 0);
+
+    // with instance specified
+    let q3 = &format!("{} | filter instance_id == \"{}\"", q1, i1.identity.id);
+
+    // project containing instance gives me something
+    let result = project_timeseries_query(&cptestctx, "project1", q3).await;
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].timeseries().len(), 1);
+
+    // should be empty or error
+    let result = project_timeseries_query(&cptestctx, "project2", q3).await;
+    assert_eq!(result.len(), 1);
+    assert_eq!(result[0].timeseries().len(), 0);
+
+    // expect error when querying a metric that has no project_id on it
+    let q4 = "get integration_target:integration_metric";
+    let url = "/v1/timeseries/query?project=project1";
+    let body = nexus_types::external_api::params::TimeseriesQuery {
+        query: q4.to_string(),
+    };
+    let result =
+        object_create_error(client, url, &body, StatusCode::BAD_REQUEST).await;
+    assert_eq!(result.error_code.unwrap(), "InvalidRequest");
+    // Notable that the error confirms that the metric exists and says what the
+    // fields are. This is helpful generally, but here it would be better if
+    // we could say something more like "you can't query this timeseries from
+    // this endpoint"
+    assert_eq!(result.message, "The filter expression contains identifiers that are not valid for its input timeseries. Invalid identifiers: [\"project_id\", \"silo_id\"], timeseries fields: {\"datum\", \"metric_name\", \"target_name\", \"timestamp\"}");
+
+    // nonexistent project
+    let url = "/v1/timeseries/query?project=nonexistent";
+    let body = nexus_types::external_api::params::TimeseriesQuery {
+        query: q4.to_string(),
+    };
+    let result =
+        object_create_error(client, url, &body, StatusCode::NOT_FOUND).await;
+    assert_eq!(result.message, "not found: project with name \"nonexistent\"");
+
+    // unprivileged user gets 404 on project that exists, but which they can't read
+    let url = "/v1/timeseries/query?project=project1";
+    let body = nexus_types::external_api::params::TimeseriesQuery {
+        query: q1.to_string(),
+    };
+
+    let request = RequestBuilder::new(client, Method::POST, url)
+        .body(Some(&body))
+        .expect_status(Some(StatusCode::NOT_FOUND));
+    let result = NexusRequest::new(request)
+        .authn_as(AuthnMode::UnprivilegedUser)
+        .execute()
+        .await
+        .unwrap()
+        .parsed_body::<HttpErrorResponseBody>()
+        .unwrap();
+    assert_eq!(result.message, "not found: project with name \"project1\"");
+
+    // now grant the user access to that project only
+    grant_iam(
+        client,
+        "/v1/projects/project1",
+        ProjectRole::Viewer,
+        USER_TEST_UNPRIVILEGED.id(),
+        AuthnMode::PrivilegedUser,
+    )
+    .await;
+
+    // now they can access the timeseries. how cool is that
+    let request = RequestBuilder::new(client, Method::POST, url)
+        .body(Some(&body))
+        .expect_status(Some(StatusCode::OK));
+    let result = NexusRequest::new(request)
+        .authn_as(AuthnMode::UnprivilegedUser)
+        .execute_and_parse_unwrap::<OxqlQueryResult>()
+        .await;
+    assert_eq!(result.tables.len(), 1);
+    assert_eq!(result.tables[0].timeseries().len(), 1);
+}
+
 #[nexus_test]
 async fn test_mgs_metrics(
     cptestctx: &ControlPlaneTestContext<omicron_nexus::Server>,
@@ -714,7 +924,7 @@ async fn test_mgs_metrics(
                 .try_force_collect()
                 .await
                 .expect("Could not force oximeter collection");
-            let table = timeseries_query(&cptestctx, &query)
+            let table = system_timeseries_query(&cptestctx, &query)
                 .await
                 .into_iter()
                 .find(|t| t.name() == name)
diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml
index 5f21652feb..8990b0b83b 100644
--- a/nexus/types/Cargo.toml
+++ b/nexus/types/Cargo.toml
@@ -21,6 +21,7 @@ dropshot.workspace = true
 futures.workspace = true
 http.workspace = true
 humantime.workspace = true
+illumos-utils.workspace = true
 ipnetwork.workspace = true
 newtype_derive.workspace = true
 omicron-uuid-kinds.workspace = true
diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs
index 3a17f69863..a487fea2ce 100644
--- a/nexus/types/src/deployment.rs
+++ b/nexus/types/src/deployment.rs
@@ -736,6 +736,17 @@ impl BlueprintZoneConfig {
     pub fn underlay_ip(&self) -> Ipv6Addr {
         self.zone_type.underlay_ip()
     }
+
+    /// Returns the dataset used for the the zone's (transient) root filesystem.
+    pub fn filesystem_dataset(&self) -> Option<DatasetName> {
+        let pool_name = self.filesystem_pool.clone()?;
+        let name = illumos_utils::zone::zone_name(
+            self.zone_type.kind().zone_prefix(),
+            Some(self.id),
+        );
+        let kind = DatasetKind::TransientZone { name };
+        Some(DatasetName::new(pool_name, kind))
+    }
 }
 
 impl From<BlueprintZoneConfig> for OmicronZoneConfig {
@@ -917,6 +928,26 @@ pub enum BlueprintPhysicalDiskDisposition {
     Expunged,
 }
 
+impl BlueprintPhysicalDiskDisposition {
+    /// Returns true if the disk disposition matches this filter.
+    pub fn matches(self, filter: DiskFilter) -> bool {
+        match self {
+            Self::InService => match filter {
+                DiskFilter::All => true,
+                DiskFilter::InService => true,
+                // TODO remove this variant?
+                DiskFilter::ExpungedButActive => false,
+            },
+            Self::Expunged => match filter {
+                DiskFilter::All => true,
+                DiskFilter::InService => false,
+                // TODO remove this variant?
+                DiskFilter::ExpungedButActive => true,
+            },
+        }
+    }
+}
+
 /// Information about an Omicron physical disk as recorded in a bluerprint.
 #[derive(Clone, Debug, Deserialize, Serialize, JsonSchema, PartialEq, Eq)]
 pub struct BlueprintPhysicalDiskConfig {
diff --git a/openapi/nexus.json b/openapi/nexus.json
index 79186e379a..c0b6a96fcf 100644
--- a/openapi/nexus.json
+++ b/openapi/nexus.json
@@ -8890,6 +8890,55 @@
         }
       }
     },
+    "/v1/timeseries/query": {
+      "post": {
+        "tags": [
+          "hidden"
+        ],
+        "summary": "Run project-scoped timeseries query",
+        "description": "Queries are written in OxQL. Project must be specified by name or ID in URL query parameter. The OxQL query will only return timeseries data from the specified project.",
+        "operationId": "timeseries_query",
+        "parameters": [
+          {
+            "in": "query",
+            "name": "project",
+            "description": "Name or ID of the project",
+            "required": true,
+            "schema": {
+              "$ref": "#/components/schemas/NameOrId"
+            }
+          }
+        ],
+        "requestBody": {
+          "content": {
+            "application/json": {
+              "schema": {
+                "$ref": "#/components/schemas/TimeseriesQuery"
+              }
+            }
+          },
+          "required": true
+        },
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/OxqlQueryResult"
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      }
+    },
     "/v1/users": {
       "get": {
         "tags": [
diff --git a/openapi/oximeter.json b/openapi/oximeter.json
index dea3418b8d..b51c56b667 100644
--- a/openapi/oximeter.json
+++ b/openapi/oximeter.json
@@ -84,6 +84,39 @@
       }
     },
     "/producers/{producer_id}": {
+      "get": {
+        "summary": "Get details about a producer by ID.",
+        "operationId": "producer_details",
+        "parameters": [
+          {
+            "in": "path",
+            "name": "producer_id",
+            "required": true,
+            "schema": {
+              "type": "string",
+              "format": "uuid"
+            }
+          }
+        ],
+        "responses": {
+          "200": {
+            "description": "successful operation",
+            "content": {
+              "application/json": {
+                "schema": {
+                  "$ref": "#/components/schemas/ProducerDetails"
+                }
+              }
+            }
+          },
+          "4XX": {
+            "$ref": "#/components/responses/Error"
+          },
+          "5XX": {
+            "$ref": "#/components/responses/Error"
+          }
+        }
+      },
       "delete": {
         "summary": "Delete a producer by ID.",
         "operationId": "producer_delete",
@@ -171,6 +204,114 @@
           "request_id"
         ]
       },
+      "FailedCollection": {
+        "description": "Details about a previous failed collection.",
+        "type": "object",
+        "properties": {
+          "reason": {
+            "description": "The reason the collection failed.",
+            "type": "string"
+          },
+          "started_at": {
+            "description": "The time at which we started a collection.\n\nNote that this is the time we queued a request to collect for processing by a background task. The `time_queued` can be added to this time to figure out when processing began, and `time_collecting` can be added to that to figure out how long the actual collection process took.",
+            "type": "string",
+            "format": "date-time"
+          },
+          "time_collecting": {
+            "description": "The time it took for the actual collection.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Duration"
+              }
+            ]
+          },
+          "time_queued": {
+            "description": "The time this request spent queued before being processed.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Duration"
+              }
+            ]
+          }
+        },
+        "required": [
+          "reason",
+          "started_at",
+          "time_collecting",
+          "time_queued"
+        ]
+      },
+      "ProducerDetails": {
+        "type": "object",
+        "properties": {
+          "address": {
+            "description": "The current collection address.",
+            "type": "string"
+          },
+          "id": {
+            "description": "The producer's ID.",
+            "type": "string",
+            "format": "uuid"
+          },
+          "interval": {
+            "description": "The current collection interval.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Duration"
+              }
+            ]
+          },
+          "last_failure": {
+            "nullable": true,
+            "description": "Details about the last failed collection.\n\nThis is None if we've never failed to collect from the producer.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/FailedCollection"
+              }
+            ]
+          },
+          "last_success": {
+            "nullable": true,
+            "description": "Details about the last successful collection.\n\nThis is None if we've never successfully collected from the producer.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/SuccessfulCollection"
+              }
+            ]
+          },
+          "n_collections": {
+            "description": "The total number of successful collections we've made.",
+            "type": "integer",
+            "format": "uint64",
+            "minimum": 0
+          },
+          "n_failures": {
+            "description": "The total number of failed collections.",
+            "type": "integer",
+            "format": "uint64",
+            "minimum": 0
+          },
+          "registered": {
+            "description": "The time the producer was first registered with us.",
+            "type": "string",
+            "format": "date-time"
+          },
+          "updated": {
+            "description": "The last time the producer's information was updated.",
+            "type": "string",
+            "format": "date-time"
+          }
+        },
+        "required": [
+          "address",
+          "id",
+          "interval",
+          "n_collections",
+          "n_failures",
+          "registered",
+          "updated"
+        ]
+      },
       "ProducerEndpoint": {
         "description": "Information announced by a metric server, used so that clients can contact it and collect available metric data from it.",
         "type": "object",
@@ -261,6 +402,45 @@
             ]
           }
         ]
+      },
+      "SuccessfulCollection": {
+        "description": "Details about a previous successful collection.",
+        "type": "object",
+        "properties": {
+          "n_samples": {
+            "description": "The number of samples collected.",
+            "type": "integer",
+            "format": "uint64",
+            "minimum": 0
+          },
+          "started_at": {
+            "description": "The time at which we started a collection.\n\nNote that this is the time we queued a request to collect for processing by a background task. The `time_queued` can be added to this time to figure out when processing began, and `time_collecting` can be added to that to figure out how long the actual collection process took.",
+            "type": "string",
+            "format": "date-time"
+          },
+          "time_collecting": {
+            "description": "The time it took for the actual collection.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Duration"
+              }
+            ]
+          },
+          "time_queued": {
+            "description": "The time this request spent queued before being processed.",
+            "allOf": [
+              {
+                "$ref": "#/components/schemas/Duration"
+              }
+            ]
+          }
+        },
+        "required": [
+          "n_samples",
+          "started_at",
+          "time_collecting",
+          "time_queued"
+        ]
       }
     },
     "responses": {
diff --git a/oximeter/api/src/lib.rs b/oximeter/api/src/lib.rs
index 2231a0cc5d..f47a5ba07e 100644
--- a/oximeter/api/src/lib.rs
+++ b/oximeter/api/src/lib.rs
@@ -10,6 +10,7 @@ use dropshot::{
 use omicron_common::api::internal::nexus::ProducerEndpoint;
 use schemars::JsonSchema;
 use serde::{Deserialize, Serialize};
+use std::{net::SocketAddr, time::Duration};
 use uuid::Uuid;
 
 #[dropshot::api_description]
@@ -26,6 +27,16 @@ pub trait OximeterApi {
         query: Query<PaginationParams<EmptyScanParams, ProducerPage>>,
     ) -> Result<HttpResponseOk<ResultsPage<ProducerEndpoint>>, HttpError>;
 
+    /// Get details about a producer by ID.
+    #[endpoint {
+        method = GET,
+        path = "/producers/{producer_id}",
+    }]
+    async fn producer_details(
+        request_context: RequestContext<Self::Context>,
+        path: dropshot::Path<ProducerIdPathParams>,
+    ) -> Result<HttpResponseOk<ProducerDetails>, HttpError>;
+
     /// Delete a producer by ID.
     #[endpoint {
         method = DELETE,
@@ -64,3 +75,120 @@ pub struct CollectorInfo {
     /// Last time we refreshed our producer list with Nexus.
     pub last_refresh: Option<DateTime<Utc>>,
 }
+
+/// Details about a previous successful collection.
+#[derive(Clone, Copy, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct SuccessfulCollection {
+    /// The time at which we started a collection.
+    ///
+    /// Note that this is the time we queued a request to collect for processing
+    /// by a background task. The `time_queued` can be added to this time to
+    /// figure out when processing began, and `time_collecting` can be added to
+    /// that to figure out how long the actual collection process took.
+    pub started_at: DateTime<Utc>,
+
+    /// The time this request spent queued before being processed.
+    pub time_queued: Duration,
+
+    /// The time it took for the actual collection.
+    pub time_collecting: Duration,
+
+    /// The number of samples collected.
+    pub n_samples: u64,
+}
+
+/// Details about a previous failed collection.
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct FailedCollection {
+    /// The time at which we started a collection.
+    ///
+    /// Note that this is the time we queued a request to collect for processing
+    /// by a background task. The `time_queued` can be added to this time to
+    /// figure out when processing began, and `time_collecting` can be added to
+    /// that to figure out how long the actual collection process took.
+    pub started_at: DateTime<Utc>,
+
+    /// The time this request spent queued before being processed.
+    pub time_queued: Duration,
+
+    /// The time it took for the actual collection.
+    pub time_collecting: Duration,
+
+    /// The reason the collection failed.
+    pub reason: String,
+}
+
+#[derive(Clone, Debug, Deserialize, JsonSchema, Serialize)]
+pub struct ProducerDetails {
+    /// The producer's ID.
+    pub id: Uuid,
+
+    /// The current collection interval.
+    pub interval: Duration,
+
+    /// The current collection address.
+    pub address: SocketAddr,
+
+    /// The time the producer was first registered with us.
+    pub registered: DateTime<Utc>,
+
+    /// The last time the producer's information was updated.
+    pub updated: DateTime<Utc>,
+
+    /// Details about the last successful collection.
+    ///
+    /// This is None if we've never successfully collected from the producer.
+    pub last_success: Option<SuccessfulCollection>,
+
+    /// Details about the last failed collection.
+    ///
+    /// This is None if we've never failed to collect from the producer.
+    pub last_failure: Option<FailedCollection>,
+
+    /// The total number of successful collections we've made.
+    pub n_collections: u64,
+
+    /// The total number of failed collections.
+    pub n_failures: u64,
+}
+
+impl ProducerDetails {
+    pub fn new(info: &ProducerEndpoint) -> Self {
+        let now = Utc::now();
+        Self {
+            id: info.id,
+            interval: info.interval,
+            address: info.address,
+            registered: now,
+            updated: now,
+            last_success: None,
+            last_failure: None,
+            n_collections: 0,
+            n_failures: 0,
+        }
+    }
+
+    /// Update with new producer information.
+    ///
+    /// # Panics
+    ///
+    /// This panics if the new information refers to a different ID.
+    pub fn update(&mut self, new: &ProducerEndpoint) {
+        assert_eq!(self.id, new.id);
+        self.updated = Utc::now();
+        self.address = new.address;
+        self.interval = new.interval;
+    }
+
+    /// Update when we successfully complete a collection.
+    pub fn on_success(&mut self, success: SuccessfulCollection) {
+        self.last_success = Some(success);
+        self.n_collections += 1;
+    }
+
+    /// Update when we fail to complete a collection.
+    pub fn on_failure(&mut self, failure: FailedCollection) {
+        self.last_failure = Some(failure);
+        self.n_failures += 1;
+    }
+}
diff --git a/oximeter/collector/src/agent.rs b/oximeter/collector/src/agent.rs
index 6fa8c01c56..e924cb2ee3 100644
--- a/oximeter/collector/src/agent.rs
+++ b/oximeter/collector/src/agent.rs
@@ -4,8 +4,12 @@
 
 //! The oximeter agent handles collection tasks for each producer.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
+use crate::collection_task::CollectionTaskHandle;
+use crate::collection_task::CollectionTaskOutput;
+use crate::collection_task::ForcedCollectionError;
+use crate::results_sink;
 use crate::self_stats;
 use crate::DbConfig;
 use crate::Error;
@@ -18,8 +22,7 @@ use nexus_client::types::IdSortMode;
 use nexus_client::Client as NexusClient;
 use omicron_common::backoff;
 use omicron_common::backoff::BackoffError;
-use oximeter::types::ProducerResults;
-use oximeter::types::ProducerResultsItem;
+use oximeter_api::ProducerDetails;
 use oximeter_db::Client;
 use oximeter_db::DbWrite;
 use qorb::claim::Handle;
@@ -41,613 +44,10 @@ use std::sync::Arc;
 use std::sync::Mutex as StdMutex;
 use std::time::Duration;
 use tokio::sync::mpsc;
-use tokio::sync::mpsc::error::TrySendError;
-use tokio::sync::oneshot;
-use tokio::sync::watch;
 use tokio::sync::Mutex;
 use tokio::sync::MutexGuard;
-use tokio::task::JoinHandle;
-use tokio::time::interval;
 use uuid::Uuid;
 
-/// A token used to force a collection.
-///
-/// If the collection is successfully completed, `Ok(())` will be sent back on the
-/// contained oneshot channel. Note that that "successful" means the actual
-/// request completed, _not_ that results were successfully collected. I.e., it
-/// means "this attempt is done".
-///
-/// If the collection could not be queued because there are too many outstanding
-/// force collection attempts, an `Err(ForcedCollectionQueueFull)` is returned.
-type CollectionToken = oneshot::Sender<Result<(), ForcedCollectionError>>;
-
-/// Error returned when a forced collection fails.
-#[derive(Clone, Copy, Debug)]
-pub enum ForcedCollectionError {
-    /// The internal queue of requests is full.
-    QueueFull,
-    /// We failed to send the request because the channel was closed.
-    Closed,
-}
-
-/// Timeout on any single collection from a producer.
-const COLLECTION_TIMEOUT: Duration = Duration::from_secs(30);
-
-/// The number of forced collections queued before we start to deny them.
-const N_QUEUED_FORCED_COLLECTIONS: usize = 1;
-
-/// The number of timer-based collections queued before we start to deny them.
-const N_QUEUED_TIMER_COLLECTIONS: usize = 1;
-
-// Messages for controlling a collection task
-#[derive(Debug)]
-enum CollectionMessage {
-    // Explicit request that the task collect data from its producer
-    //
-    // Also sends a oneshot that is signalled once the task scrapes
-    // data from the Producer, and places it in the Clickhouse server.
-    Collect(CollectionToken),
-    // Request that the task update its interval and the socket address on which it collects data
-    // from its producer.
-    Update(ProducerEndpoint),
-    // Request that the task exit
-    Shutdown,
-    // Return the current statistics from a single task.
-    #[cfg(test)]
-    Statistics {
-        reply_tx: oneshot::Sender<self_stats::CollectionTaskStats>,
-    },
-}
-
-/// Run a single collection from the producer.
-async fn perform_collection(
-    log: Logger,
-    client: reqwest::Client,
-    producer: ProducerEndpoint,
-) -> Result<ProducerResults, self_stats::FailureReason> {
-    debug!(log, "collecting from producer");
-    let res = client
-        .get(format!("http://{}/{}", producer.address, producer.id))
-        .send()
-        .await;
-    trace!(log, "sent collection request to producer");
-    match res {
-        Ok(res) => {
-            if res.status().is_success() {
-                match res.json::<ProducerResults>().await {
-                    Ok(results) => {
-                        debug!(
-                            log,
-                            "collected results from producer";
-                            "n_results" => results.len()
-                        );
-                        Ok(results)
-                    }
-                    Err(e) => {
-                        warn!(
-                            log,
-                            "failed to collect results from producer";
-                            InlineErrorChain::new(&e),
-                        );
-                        Err(self_stats::FailureReason::Deserialization)
-                    }
-                }
-            } else {
-                warn!(
-                    log,
-                    "failed to receive metric results from producer";
-                    "status_code" => res.status().as_u16(),
-                );
-                Err(self_stats::FailureReason::Other(res.status()))
-            }
-        }
-        Err(e) => {
-            error!(
-                log,
-                "failed to send collection request to producer";
-                InlineErrorChain::new(&e),
-            );
-            Err(self_stats::FailureReason::Unreachable)
-        }
-    }
-}
-
-// The type of one collection task run to completion.
-//
-// An `Err(_)` means we failed to collect, and contains the reason so that we
-// can bump the self-stat counter accordingly.
-type CollectionResult = Result<ProducerResults, self_stats::FailureReason>;
-
-// The type of one response message sent from the collection task.
-type CollectionResponse = (Option<CollectionToken>, CollectionResult);
-
-/// Task that actually performs collections from the producer.
-async fn inner_collection_loop(
-    log: Logger,
-    mut producer_info_rx: watch::Receiver<ProducerEndpoint>,
-    mut forced_collection_rx: mpsc::Receiver<CollectionToken>,
-    mut timer_collection_rx: mpsc::Receiver<()>,
-    result_tx: mpsc::Sender<CollectionResponse>,
-) {
-    let client = reqwest::Client::builder()
-        .timeout(COLLECTION_TIMEOUT)
-        .build()
-        // Safety: `build()` only fails if TLS couldn't be initialized or the
-        // system DNS configuration could not be loaded.
-        .unwrap();
-    loop {
-        // Wait for notification that we have a collection to perform, from
-        // either the forced- or timer-collection queue.
-        trace!(log, "top of inner collection loop, waiting for next request",);
-        let maybe_token = tokio::select! {
-            maybe_request = forced_collection_rx.recv() => {
-                let Some(request) = maybe_request else {
-                    debug!(
-                        log,
-                        "forced collection request queue closed, exiting"
-                    );
-                    return;
-                };
-                Some(request)
-            }
-            maybe_request = timer_collection_rx.recv() => {
-                if maybe_request.is_none() {
-                    debug!(
-                        log,
-                        "timer collection request queue closed, exiting"
-                    );
-                    return;
-                };
-                None
-            }
-        };
-
-        // Make a future to represent the actual collection.
-        let mut collection_fut = Box::pin(perform_collection(
-            log.clone(),
-            client.clone(),
-            *producer_info_rx.borrow_and_update(),
-        ));
-
-        // Wait for that collection to complete or fail, or for an update to the
-        // producer's information. In the latter case, recreate the future for
-        // the collection itself with the new producer information.
-        let collection_result = 'collection: loop {
-            tokio::select! {
-                biased;
-
-                maybe_update = producer_info_rx.changed() => {
-                    match maybe_update {
-                        Ok(_) => {
-                            let update = *producer_info_rx.borrow_and_update();
-                            debug!(
-                                log,
-                                "received producer info update with an outstanding \
-                                collection running, cancelling it and recreating \
-                                with the new info";
-                                "new_info" => ?&update,
-                            );
-                            collection_fut = Box::pin(perform_collection(
-                                log.new(o!("address" => update.address)),
-                                client.clone(),
-                                update,
-                            ));
-                            continue 'collection;
-                        }
-                        Err(e) => {
-                            error!(
-                                log,
-                                "failed to receive on producer update \
-                                watch channel, exiting";
-                                InlineErrorChain::new(&e),
-                            );
-                            return;
-                        }
-                    }
-                }
-
-                collection_result = &mut collection_fut => {
-                    // NOTE: This break here is intentional. We cannot just call
-                    // `result_tx.send()` in this loop, because that moves out
-                    // of `maybe_token`, which isn't Copy. Break the loop, and
-                    // then send it after we know we've completed the
-                    // collection.
-                    break 'collection collection_result;
-                }
-            }
-        };
-
-        // Now that the collection has completed, send on the results, along
-        // with any collection token we may have gotten with the request.
-        match result_tx.send((maybe_token, collection_result)).await {
-            Ok(_) => trace!(log, "forwarded results to main collection loop"),
-            Err(_) => {
-                error!(
-                    log,
-                    "failed to forward results to \
-                    collection loop, channel is closed, exiting",
-                );
-                return;
-            }
-        }
-    }
-}
-
-// Background task used to collect metrics from one producer on an interval.
-//
-// This function is started by the `OximeterAgent`, when a producer is registered. The task loops
-// endlessly, and collects metrics from the assigned producer on a timeout. The assigned agent can
-// also send a `CollectionMessage`, for example to update the collection interval. This is not
-// currently used, but will likely be exposed via control plane interfaces in the future.
-async fn collection_loop(
-    log: Logger,
-    collector: self_stats::OximeterCollector,
-    producer: ProducerEndpoint,
-    mut inbox: mpsc::Receiver<CollectionMessage>,
-    outbox: mpsc::Sender<(Option<CollectionToken>, ProducerResults)>,
-) {
-    let mut collection_timer = interval(producer.interval);
-    debug!(
-        log,
-        "starting oximeter collection task";
-        "interval" => ?producer.interval,
-    );
-
-    // Set up the collection of self statistics about this collection task.
-    let mut stats = self_stats::CollectionTaskStats::new(collector, &producer);
-    let mut self_collection_timer = interval(self_stats::COLLECTION_INTERVAL);
-    self_collection_timer.tick().await;
-
-    // Spawn a task to run the actual collections.
-    //
-    // This is so that we can possibly interrupt and restart collections that
-    // are in-progress when we get an update to the producer's information. In
-    // that case, the collection is likely doomed, since the producer has moved
-    // and won't be available at the address the collection started with. This
-    // lets us restart that collection with the new information.
-    let (producer_info_tx, producer_info_rx) = watch::channel(producer);
-    let (forced_collection_tx, forced_collection_rx) =
-        mpsc::channel(N_QUEUED_FORCED_COLLECTIONS);
-    let (timer_collection_tx, timer_collection_rx) =
-        mpsc::channel(N_QUEUED_TIMER_COLLECTIONS);
-    let (result_tx, mut result_rx) = mpsc::channel(1);
-    tokio::task::spawn(inner_collection_loop(
-        log.clone(),
-        producer_info_rx,
-        forced_collection_rx,
-        timer_collection_rx,
-        result_tx,
-    ));
-
-    loop {
-        tokio::select! {
-            message = inbox.recv() => {
-                match message {
-                    None => {
-                        debug!(
-                            log,
-                            "collection task inbox closed, shutting down"
-                        );
-                        return;
-                    }
-                    Some(CollectionMessage::Shutdown) => {
-                        debug!(
-                            log,
-                            "collection task received shutdown request"
-                        );
-                        return;
-                    },
-                    Some(CollectionMessage::Collect(token)) => {
-                        debug!(
-                            log,
-                            "collection task received explicit request to collect"
-                        );
-                        match forced_collection_tx.try_send(token) {
-                            Ok(_) => trace!(
-                                log, "forwarded explicit request to collection task"
-                            ),
-                            Err(e) => {
-                                match e {
-                                    TrySendError::Closed(tok) => {
-                                        debug!(
-                                            log,
-                                            "collection task forced collection \
-                                            queue is closed. Attempting to \
-                                            notify caller and exiting.",
-                                        );
-                                        let _ = tok.send(Err(ForcedCollectionError::Closed));
-                                        return;
-                                    }
-                                    TrySendError::Full(tok) => {
-                                        error!(
-                                            log,
-                                            "collection task forced collection \
-                                            queue is full! This should never \
-                                            happen, and probably indicates \
-                                            a bug in your test code, such as \
-                                            calling `force_collection()` many \
-                                            times"
-                                        );
-                                        if tok
-                                            .send(Err(ForcedCollectionError::QueueFull))
-                                            .is_err()
-                                        {
-                                            warn!(
-                                                log,
-                                                "failed to notify caller of \
-                                                force_collection(), oneshot is \
-                                                closed"
-                                            );
-                                        }
-                                    }
-                                }
-                            }
-                        }
-                    },
-                    Some(CollectionMessage::Update(new_info)) => {
-                        // If the collection interval is shorter than the
-                        // interval on which we receive these update messages,
-                        // we'll never actually collect anything! Instead, only
-                        // do the update if the information has changed. This
-                        // should also be guarded against by the main agent, but
-                        // we're being cautious here.
-                        let updated_producer_info = |info: &mut ProducerEndpoint| {
-                            if new_info == *info {
-                                false
-                            } else {
-                                *info = new_info;
-                                true
-                            }
-                        };
-                        if !producer_info_tx.send_if_modified(updated_producer_info) {
-                            trace!(
-                                log,
-                                "collection task received update with \
-                                identical producer information, no \
-                                updates will be sent to the collection task"
-                            );
-                            continue;
-                        }
-
-                        // We have an actual update to the producer information.
-                        //
-                        // Rebuild our timer to reflect the possibly-new
-                        // interval. The collection task has already been
-                        // notified above.
-                        debug!(
-                            log,
-                            "collection task received request to update \
-                            its producer information";
-                            "interval" => ?new_info.interval,
-                            "address" => new_info.address,
-                        );
-                        collection_timer = interval(new_info.interval);
-                        collection_timer.tick().await; // completes immediately
-                    }
-                    #[cfg(test)]
-                    Some(CollectionMessage::Statistics { reply_tx }) => {
-                        // Time should be paused when using this retrieval
-                        // mechanism. We advance time to cause a panic if this
-                        // message were to be sent with time *not* paused.
-                        tokio::time::advance(Duration::from_nanos(1)).await;
-                        // The collection timer *may* be ready to go in which
-                        // case we would do a collection right after
-                        // processesing this message, thus changing the actual
-                        // data. Instead we reset the timer to prevent
-                        // additional collections (i.e. since time is paused).
-                        collection_timer.reset();
-                        debug!(
-                            log,
-                            "received request for current task statistics"
-                        );
-                        reply_tx.send(stats.clone()).expect("failed to send statistics");
-                    }
-                }
-            }
-            maybe_result = result_rx.recv() => {
-                let Some((maybe_token, result)) = maybe_result else {
-                    error!(
-                        log,
-                        "channel for receiving results from collection task \
-                        is closed, exiting",
-                    );
-                    return;
-                };
-                match result {
-                    Ok(results) => {
-                        stats.collections.datum.increment();
-                        if outbox.send((maybe_token, results)).await.is_err() {
-                            error!(
-                                log,
-                                "failed to send results to outbox, channel is \
-                                closed, exiting",
-                            );
-                            return;
-                        }
-                    }
-                    Err(reason) => stats.failures_for_reason(reason).datum.increment(),
-                }
-            }
-            _ = self_collection_timer.tick() => {
-                debug!(
-                    log,
-                    "reporting oximeter self-collection statistics"
-                );
-                outbox.send((None, stats.sample())).await.unwrap();
-            }
-            _ = collection_timer.tick() => {
-                match timer_collection_tx.try_send(()) {
-                    Ok(_) => {
-                        debug!(
-                            log,
-                            "sent timer-based collection request to \
-                            the collection task"
-                        );
-                    }
-                    Err(TrySendError::Closed(_)) => {
-                        error!(
-                            log,
-                            "timer-based collection request queue is \
-                            closed, exiting"
-                        );
-                        return;
-                    }
-                    Err(TrySendError::Full(_)) => {
-                        error!(
-                            log,
-                            "timer-based collection request queue is \
-                            full! This may indicate that the producer \
-                            has a sampling interval that is too fast \
-                            for the amount of data it generates";
-                            "interval" => ?producer_info_tx.borrow().interval,
-                        );
-                        stats
-                            .failures_for_reason(
-                                self_stats::FailureReason::CollectionsInProgress
-                            )
-                            .datum
-                            .increment()
-                    }
-                }
-            }
-        }
-    }
-}
-
-// Struct representing a task for collecting metric data from a single producer
-#[derive(Debug)]
-struct CollectionTask {
-    // Channel used to send messages from the agent to the actual task. The task owns the other
-    // side.
-    pub inbox: mpsc::Sender<CollectionMessage>,
-    // Handle to the actual tokio task running the collection loop.
-    #[allow(dead_code)]
-    pub task: JoinHandle<()>,
-}
-
-// A task run by `oximeter` in standalone mode, which simply prints results as
-// they're received.
-async fn results_printer(
-    log: Logger,
-    mut rx: mpsc::Receiver<(Option<CollectionToken>, ProducerResults)>,
-) {
-    loop {
-        match rx.recv().await {
-            Some((_, results)) => {
-                for res in results.into_iter() {
-                    match res {
-                        ProducerResultsItem::Ok(samples) => {
-                            for sample in samples.into_iter() {
-                                info!(
-                                    log,
-                                    "";
-                                    "sample" => ?sample,
-                                );
-                            }
-                        }
-                        ProducerResultsItem::Err(e) => {
-                            error!(
-                                log,
-                                "received error from a producer";
-                                InlineErrorChain::new(&e),
-                            );
-                        }
-                    }
-                }
-            }
-            None => {
-                debug!(log, "result queue closed, exiting");
-                return;
-            }
-        }
-    }
-}
-
-// Aggregation point for all results, from all collection tasks.
-async fn results_sink(
-    log: Logger,
-    client: Client,
-    batch_size: usize,
-    batch_interval: Duration,
-    mut rx: mpsc::Receiver<(Option<CollectionToken>, ProducerResults)>,
-) {
-    let mut timer = interval(batch_interval);
-    timer.tick().await; // completes immediately
-    let mut batch = Vec::with_capacity(batch_size);
-    loop {
-        let mut collection_token = None;
-        let insert = tokio::select! {
-            _ = timer.tick() => {
-                if batch.is_empty() {
-                    trace!(log, "batch interval expired, but no samples to insert");
-                    false
-                } else {
-                    true
-                }
-            }
-            results = rx.recv() => {
-                match results {
-                    Some((token, results)) => {
-                        let flattened_results = {
-                            let mut flattened = Vec::with_capacity(results.len());
-                            for inner_batch in results.into_iter() {
-                                match inner_batch {
-                                    ProducerResultsItem::Ok(samples) => flattened.extend(samples.into_iter()),
-                                    ProducerResultsItem::Err(e) => {
-                                        debug!(
-                                            log,
-                                            "received error (not samples) from a producer: {}",
-                                            e.to_string()
-                                        );
-                                    }
-                                }
-                            }
-                            flattened
-                        };
-                        batch.extend(flattened_results);
-
-                        collection_token = token;
-                        if collection_token.is_some() {
-                            true
-                        } else {
-                            batch.len() >= batch_size
-                        }
-                    }
-                    None => {
-                        warn!(log, "result queue closed, exiting");
-                        return;
-                    }
-                }
-            }
-        };
-
-        if insert {
-            debug!(log, "inserting {} samples into database", batch.len());
-            match client.insert_samples(&batch).await {
-                Ok(()) => trace!(log, "successfully inserted samples"),
-                Err(e) => {
-                    warn!(
-                        log,
-                        "failed to insert some results into metric DB: {}",
-                        e.to_string()
-                    );
-                }
-            }
-            // TODO-correctness The `insert_samples` call above may fail. The method itself needs
-            // better handling of partially-inserted results in that case, but we may need to retry
-            // or otherwise handle an error here as well.
-            //
-            // See https://github.com/oxidecomputer/omicron/issues/740 for a
-            // disucssion.
-            batch.clear();
-        }
-
-        if let Some(token) = collection_token {
-            let _ = token.send(Ok(()));
-        }
-    }
-}
-
 /// The internal agent the oximeter server uses to collect metrics from producers.
 #[derive(Clone, Debug)]
 pub struct OximeterAgent {
@@ -657,10 +57,9 @@ pub struct OximeterAgent {
     // Oximeter target used by this agent to produce metrics about itself.
     collection_target: self_stats::OximeterCollector,
     // Handle to the TX-side of a channel for collecting results from the collection tasks
-    result_sender: mpsc::Sender<(Option<CollectionToken>, ProducerResults)>,
-    // The actual tokio tasks running the collection on a timer.
-    collection_tasks:
-        Arc<Mutex<BTreeMap<Uuid, (ProducerEndpoint, CollectionTask)>>>,
+    result_sender: mpsc::Sender<CollectionTaskOutput>,
+    // Handle to each Tokio task collection from a single producer.
+    collection_tasks: Arc<Mutex<BTreeMap<Uuid, CollectionTaskHandle>>>,
     // The interval on which we refresh our list of producers from Nexus
     refresh_interval: Duration,
     // Handle to the task used to periodically refresh the list of producers.
@@ -729,7 +128,7 @@ impl OximeterAgent {
 
         // Spawn the task for aggregating and inserting all metrics
         tokio::spawn(async move {
-            results_sink(
+            crate::results_sink::database_inserter(
                 insertion_log,
                 client,
                 db_config.batch_size,
@@ -814,7 +213,7 @@ impl OximeterAgent {
 
             // Spawn the task for aggregating and inserting all metrics
             tokio::spawn(async move {
-                results_sink(
+                results_sink::database_inserter(
                     insertion_log,
                     client,
                     db_config.batch_size,
@@ -824,7 +223,7 @@ impl OximeterAgent {
                 .await
             });
         } else {
-            tokio::spawn(results_printer(insertion_log, result_receiver));
+            tokio::spawn(results_sink::logger(insertion_log, result_receiver));
         }
 
         // Set up tracking of statistics about ourselves.
@@ -851,6 +250,18 @@ impl OximeterAgent {
         })
     }
 
+    /// Fetch details about a producer, if it exists.
+    pub async fn producer_details(
+        &self,
+        id: Uuid,
+    ) -> Result<ProducerDetails, Error> {
+        let tasks = self.collection_tasks.lock().await;
+        let Some(task) = tasks.get(&id) else {
+            return Err(Error::NoSuchProducer { id });
+        };
+        task.details().await
+    }
+
     /// Register a new producer with this oximeter instance.
     pub async fn register_producer(
         &self,
@@ -865,10 +276,7 @@ impl OximeterAgent {
     // the map is held.
     async fn register_producer_locked(
         &self,
-        tasks: &mut MutexGuard<
-            '_,
-            BTreeMap<Uuid, (ProducerEndpoint, CollectionTask)>,
-        >,
+        tasks: &mut MutexGuard<'_, BTreeMap<Uuid, CollectionTaskHandle>>,
         info: ProducerEndpoint,
     ) {
         let id = info.id;
@@ -880,26 +288,20 @@ impl OximeterAgent {
                     "producer_id" => id.to_string(),
                     "address" => info.address,
                 );
-
-                // Build channel to control the task and receive results.
-                let (tx, rx) = mpsc::channel(4);
-                let q = self.result_sender.clone();
-                let log = self.log.new(o!(
-                    "component" => "collection-task",
-                    "producer_id" => id.to_string(),
-                ));
-                let info_clone = info;
-                let target = self.collection_target;
-                let task = tokio::spawn(async move {
-                    collection_loop(log, target, info_clone, rx, q).await;
-                });
-                value.insert((info, CollectionTask { inbox: tx, task }));
+                let handle = CollectionTaskHandle::new(
+                    &self.log,
+                    self.collection_target,
+                    info,
+                    self.result_sender.clone(),
+                )
+                .await;
+                value.insert(handle);
             }
             Entry::Occupied(mut value) => {
                 // Only update the endpoint information if it's actually
                 // different, to avoid indefinitely delaying the collection
                 // timer from expiring.
-                if value.get().0 == info {
+                if value.get().producer == info {
                     trace!(
                         self.log,
                         "ignoring request to update existing metric \
@@ -916,14 +318,7 @@ impl OximeterAgent {
                         "interval" => ?info.interval,
                         "address" => info.address,
                     );
-                    value.get_mut().0 = info;
-                    value
-                        .get()
-                        .1
-                        .inbox
-                        .send(CollectionMessage::Update(info))
-                        .await
-                        .unwrap();
+                    value.get_mut().update(info).await;
                 }
             }
         }
@@ -943,10 +338,9 @@ impl OximeterAgent {
     ) -> Result<(), ForcedCollectionError> {
         let mut collection_oneshots = vec![];
         let collection_tasks = self.collection_tasks.lock().await;
-        for (_id, (_endpoint, task)) in collection_tasks.iter() {
-            let (tx, rx) = oneshot::channel();
+        for (_id, task) in collection_tasks.iter() {
             // Scrape from each producer, into oximeter...
-            task.inbox.send(CollectionMessage::Collect(tx)).await.unwrap();
+            let rx = task.collect();
             // ... and keep track of the token that indicates once the metric
             // has made it into ClickHouse.
             collection_oneshots.push(rx);
@@ -990,7 +384,7 @@ impl OximeterAgent {
             .await
             .range((start, Bound::Unbounded))
             .take(limit)
-            .map(|(_id, (info, _t))| *info)
+            .map(|(_id, task)| task.producer)
             .collect()
     }
 
@@ -1004,13 +398,10 @@ impl OximeterAgent {
     // the map is held.
     async fn delete_producer_locked(
         &self,
-        tasks: &mut MutexGuard<
-            '_,
-            BTreeMap<Uuid, (ProducerEndpoint, CollectionTask)>,
-        >,
+        tasks: &mut MutexGuard<'_, BTreeMap<Uuid, CollectionTaskHandle>>,
         id: Uuid,
     ) -> Result<(), Error> {
-        let Some((_info, task)) = tasks.remove(&id) else {
+        let Some(task) = tasks.remove(&id) else {
             // We have no such producer, so good news, we've removed it!
             return Ok(());
         };
@@ -1019,19 +410,7 @@ impl OximeterAgent {
             "removed collection task from set";
             "producer_id" => %id,
         );
-        match task.inbox.send(CollectionMessage::Shutdown).await {
-            Ok(_) => debug!(
-                self.log,
-                "shut down collection task";
-                "producer_id" => %id,
-            ),
-            Err(e) => error!(
-                self.log,
-                "failed to shut down collection task";
-                "producer_id" => %id,
-                InlineErrorChain::new(&e),
-            ),
-        }
+        task.shutdown().await;
         Ok(())
     }
 
@@ -1089,6 +468,7 @@ async fn refresh_producer_list_task(
     let mut interval = tokio::time::interval(agent.refresh_interval);
     interval.set_missed_tick_behavior(tokio::time::MissedTickBehavior::Skip);
 
+    info!(agent.log, "starting refresh list task");
     loop {
         interval.tick().await;
         info!(agent.log, "refreshing list of producers from Nexus");
@@ -1204,10 +584,10 @@ async fn claim_nexus_with_backoff(
 
 #[cfg(test)]
 mod tests {
-    use super::CollectionMessage;
     use super::OximeterAgent;
     use super::ProducerEndpoint;
     use crate::self_stats::FailureReason;
+    use chrono::Utc;
     use dropshot::HttpError;
     use dropshot::HttpResponseOk;
     use dropshot::Path;
@@ -1223,7 +603,6 @@ mod tests {
     use std::sync::atomic::Ordering;
     use std::sync::Arc;
     use std::time::Duration;
-    use tokio::sync::oneshot;
     use tokio::time::Instant;
     use uuid::Uuid;
 
@@ -1354,21 +733,15 @@ mod tests {
         }
 
         // Request the statistics from the task itself.
-        let (reply_tx, rx) = oneshot::channel();
-        collector
+        let stats = collector
             .collection_tasks
             .lock()
             .await
             .values()
             .next()
             .unwrap()
-            .1
-            .inbox
-            .send(CollectionMessage::Statistics { reply_tx })
-            .await
-            .expect("failed to request statistics from task");
-        let stats = rx.await.expect("failed to receive statistics from task");
-
+            .statistics()
+            .await;
         let count = stats.collections.datum.value() as usize;
 
         assert!(count != 0);
@@ -1425,20 +798,15 @@ mod tests {
         }
 
         // Request the statistics from the task itself.
-        let (reply_tx, rx) = oneshot::channel();
-        collector
+        let stats = collector
             .collection_tasks
             .lock()
             .await
             .values()
             .next()
             .unwrap()
-            .1
-            .inbox
-            .send(CollectionMessage::Statistics { reply_tx })
-            .await
-            .expect("failed to request statistics from task");
-        let stats = rx.await.expect("failed to receive statistics from task");
+            .statistics()
+            .await;
         assert_eq!(stats.collections.datum.value(), 0);
         assert_eq!(
             stats
@@ -1504,20 +872,15 @@ mod tests {
         }
 
         // Request the statistics from the task itself.
-        let (reply_tx, rx) = oneshot::channel();
-        collector
+        let stats = collector
             .collection_tasks
             .lock()
             .await
             .values()
             .next()
             .unwrap()
-            .1
-            .inbox
-            .send(CollectionMessage::Statistics { reply_tx })
-            .await
-            .expect("failed to request statistics from task");
-        let stats = rx.await.expect("failed to receive statistics from task");
+            .statistics()
+            .await;
         let count = stats
             .failed_collections
             .get(&FailureReason::Other(
@@ -1562,4 +925,184 @@ mod tests {
         );
         logctx.cleanup_successful();
     }
+
+    #[tokio::test]
+    async fn verify_producer_details() {
+        let logctx = test_setup_log("verify_producer_details");
+        let log = &logctx.log;
+
+        // Spawn an oximeter collector ...
+        let collector = OximeterAgent::new_standalone(
+            Uuid::new_v4(),
+            SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0),
+            crate::default_refresh_interval(),
+            None,
+            log,
+        )
+        .await
+        .unwrap();
+
+        // Spawn the mock server that always reports empty statistics.
+        let collection_count = Arc::new(AtomicUsize::new(0));
+        let server = ServerBuilder::new(
+            producer_api_mod::api_description::<EmptyProducer>().unwrap(),
+            collection_count.clone(),
+            log.new(slog::o!("component" => "dropshot")),
+        )
+        .config(Default::default())
+        .start()
+        .expect("failed to spawn empty dropshot server");
+
+        // Register the dummy producer.
+        let endpoint = ProducerEndpoint {
+            id: Uuid::new_v4(),
+            kind: ProducerKind::Service,
+            address: server.local_addr(),
+            interval: COLLECTION_INTERVAL,
+        };
+        let id = endpoint.id;
+        let before = Utc::now();
+        collector
+            .register_producer(endpoint)
+            .await
+            .expect("failed to register dummy producer");
+
+        // We don't manipulate time manually here, since this is pretty short
+        // and we want to assert things about the actual timing in the test
+        // below.
+        let is_ready = || async {
+            // We need to check if the server has had a collection request, and
+            // also if we've processed it on our task side. If we don't wait for
+            // the second bit, updating our collection details in the task races
+            // with the rest of this test that checks those details.
+            if collection_count.load(Ordering::SeqCst) < 1 {
+                return false;
+            }
+            collector
+                .producer_details(id)
+                .await
+                .expect("Should be able to get producer details")
+                .n_collections
+                > 0
+        };
+        while !is_ready().await {
+            tokio::time::sleep(TICK_INTERVAL).await;
+        }
+
+        // Get details about the producer.
+        let count = collection_count.load(Ordering::SeqCst) as u64;
+        let details = collector
+            .producer_details(id)
+            .await
+            .expect("Should be able to get producer details");
+        println!("{details:#?}");
+        assert_eq!(details.id, id);
+        assert!(details.registered > before);
+        assert!(details.updated > before);
+        assert_eq!(details.registered, details.updated);
+        assert!(
+            details.n_collections == count
+                || details.n_collections == count - 1
+        );
+        assert_eq!(details.n_failures, 0);
+        let success =
+            details.last_success.expect("Should have a successful collection");
+        assert!(success.time_queued > Duration::ZERO);
+        assert!(success.time_collecting > Duration::ZERO);
+        assert!(success.n_samples == 0);
+        assert!(details.last_failure.is_none());
+        logctx.cleanup_successful();
+    }
+
+    #[tokio::test]
+    async fn test_updated_producer_is_still_collected_from() {
+        let logctx =
+            test_setup_log("test_updated_producer_is_still_collected_from");
+        let log = &logctx.log;
+
+        // Spawn an oximeter collector ...
+        let collector = OximeterAgent::new_standalone(
+            Uuid::new_v4(),
+            SocketAddrV6::new(Ipv6Addr::LOCALHOST, 0, 0, 0),
+            crate::default_refresh_interval(),
+            None,
+            log,
+        )
+        .await
+        .unwrap();
+
+        // Spawn the mock server that always reports empty statistics.
+        let collection_count = Arc::new(AtomicUsize::new(0));
+        let server = ServerBuilder::new(
+            producer_api_mod::api_description::<EmptyProducer>().unwrap(),
+            collection_count.clone(),
+            log.new(slog::o!("component" => "dropshot")),
+        )
+        .config(Default::default())
+        .start()
+        .expect("failed to spawn empty dropshot server");
+
+        // Register the dummy producer.
+        let id = Uuid::new_v4();
+        let endpoint = ProducerEndpoint {
+            id,
+            kind: ProducerKind::Service,
+            address: server.local_addr(),
+            interval: COLLECTION_INTERVAL,
+        };
+        collector
+            .register_producer(endpoint)
+            .await
+            .expect("failed to register dummy producer");
+
+        let details = collector.producer_details(id).await.unwrap();
+        println!("{details:#?}");
+
+        // Ensure we get some collections from it.
+        tokio::time::pause();
+        while collection_count.load(Ordering::SeqCst) < 1 {
+            tokio::time::advance(TICK_INTERVAL).await;
+        }
+
+        // Now, drop and recreate the server, and register with the same ID at a
+        // different address.
+        let collection_count = Arc::new(AtomicUsize::new(0));
+        let server = ServerBuilder::new(
+            producer_api_mod::api_description::<EmptyProducer>().unwrap(),
+            collection_count.clone(),
+            log.new(slog::o!("component" => "dropshot")),
+        )
+        .config(Default::default())
+        .start()
+        .expect("failed to spawn empty dropshot server");
+
+        // Register the dummy producer.
+        let endpoint =
+            ProducerEndpoint { address: server.local_addr(), ..endpoint };
+        collector
+            .register_producer(endpoint)
+            .await
+            .expect("failed to register dummy producer a second time");
+
+        // We should just have one producer.
+        assert_eq!(
+            collector.collection_tasks.lock().await.len(),
+            1,
+            "Should only have one producer, it was updated and has the \
+            same UUID",
+        );
+
+        // We should eventually collect from it again.
+        let now = Instant::now();
+        while now.elapsed() < TEST_WAIT_PERIOD {
+            tokio::time::advance(TICK_INTERVAL).await;
+        }
+        let details = collector.producer_details(id).await.unwrap();
+        println!("{details:#?}");
+        assert_eq!(details.id, id);
+        assert_eq!(details.address, server.local_addr());
+        assert!(details.n_collections > 0);
+        assert!(collection_count.load(Ordering::SeqCst) > 0);
+        logctx.cleanup_successful();
+    }
 }
diff --git a/oximeter/collector/src/collection_task.rs b/oximeter/collector/src/collection_task.rs
new file mode 100644
index 0000000000..716f87421f
--- /dev/null
+++ b/oximeter/collector/src/collection_task.rs
@@ -0,0 +1,898 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Task responsible for collecting from a single producer.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::self_stats;
+use crate::Error;
+use chrono::DateTime;
+use chrono::Utc;
+use omicron_common::api::internal::nexus::ProducerEndpoint;
+use oximeter::types::ProducerResults;
+use oximeter::types::ProducerResultsItem;
+use oximeter_api::FailedCollection;
+use oximeter_api::ProducerDetails;
+use oximeter_api::SuccessfulCollection;
+use slog::debug;
+use slog::error;
+use slog::o;
+use slog::trace;
+use slog::warn;
+use slog::Logger;
+use slog_error_chain::InlineErrorChain;
+use std::time::Duration;
+use tokio::sync::mpsc;
+use tokio::sync::mpsc::error::TrySendError;
+use tokio::sync::oneshot;
+use tokio::sync::watch;
+use tokio::time::interval;
+use tokio::time::Instant;
+use tokio::time::Interval;
+
+/// A token used to force a collection.
+///
+/// If the collection is successfully completed, `Ok(())` will be sent back on the
+/// contained oneshot channel. Note that that "successful" means the actual
+/// request completed, _not_ that results were successfully collected. I.e., it
+/// means "this attempt is done".
+///
+/// If the collection could not be queued because there are too many outstanding
+/// force collection attempts, an `Err(ForcedCollectionQueueFull)` is returned.
+type CollectionToken = oneshot::Sender<Result<(), ForcedCollectionError>>;
+
+/// Error returned when a forced collection fails.
+#[derive(Clone, Copy, Debug)]
+pub enum ForcedCollectionError {
+    /// The internal queue of requests is full.
+    QueueFull,
+    /// We failed to send the request because the channel was closed.
+    Closed,
+}
+
+/// Timeout on any single collection from a producer.
+const COLLECTION_TIMEOUT: Duration = Duration::from_secs(30);
+
+/// The number of forced collections queued before we start to deny them.
+const N_QUEUED_FORCED_COLLECTIONS: usize = 1;
+
+/// The number of timer-based collections queued before we start to deny them.
+const N_QUEUED_TIMER_COLLECTIONS: usize = 1;
+
+/// The number of queued messages from the main collector agent.
+const N_QUEUED_TASK_MESSAGES: usize = 4;
+
+/// The number of queued results from our internal collection task.
+const N_QUEUED_RESULTS: usize = 1;
+
+// Messages for controlling a collection task
+#[derive(Debug)]
+enum CollectionMessage {
+    // Explicit request that the task collect data from its producer
+    //
+    // Also sends a oneshot that is signalled once the task scrapes
+    // data from the Producer, and places it in the Clickhouse server.
+    Collect(CollectionToken),
+    // Request that the task update its interval and the socket address on which it collects data
+    // from its producer.
+    Update(ProducerEndpoint),
+    // Request that the task exit
+    Shutdown,
+    // Return the current statistics from a single task.
+    #[cfg(test)]
+    Statistics {
+        reply_tx: oneshot::Sender<self_stats::CollectionTaskStats>,
+    },
+    // Request details from the collection task about its producer.
+    Details {
+        reply_tx: oneshot::Sender<ProducerDetails>,
+    },
+}
+
+/// Return type for `perform_collection`.
+struct SingleCollectionResult {
+    /// The result of the collection.
+    result: Result<ProducerResults, self_stats::FailureReason>,
+    /// The duration the collection took.
+    duration: Duration,
+}
+
+/// Run a single collection from the producer.
+async fn perform_collection(
+    log: Logger,
+    client: reqwest::Client,
+    producer: ProducerEndpoint,
+) -> SingleCollectionResult {
+    let start = Instant::now();
+    debug!(log, "collecting from producer");
+    let res = client
+        .get(format!("http://{}/{}", producer.address, producer.id))
+        .send()
+        .await;
+    trace!(log, "sent collection request to producer");
+    let result = match res {
+        Ok(res) => {
+            if res.status().is_success() {
+                match res.json::<ProducerResults>().await {
+                    Ok(results) => {
+                        debug!(
+                            log,
+                            "collected results from producer";
+                            "n_results" => results.len()
+                        );
+                        Ok(results)
+                    }
+                    Err(e) => {
+                        warn!(
+                            log,
+                            "failed to collect results from producer";
+                            InlineErrorChain::new(&e),
+                        );
+                        Err(self_stats::FailureReason::Deserialization)
+                    }
+                }
+            } else {
+                warn!(
+                    log,
+                    "failed to receive metric results from producer";
+                    "status_code" => res.status().as_u16(),
+                );
+                Err(self_stats::FailureReason::Other(res.status()))
+            }
+        }
+        Err(e) => {
+            error!(
+                log,
+                "failed to send collection request to producer";
+                InlineErrorChain::new(&e),
+            );
+            Err(self_stats::FailureReason::Unreachable)
+        }
+    };
+    SingleCollectionResult { result, duration: start.elapsed() }
+}
+
+// The type of one collection task run to completion.
+//
+// An `Err(_)` means we failed to collect, and contains the reason so that we
+// can bump the self-stat counter accordingly.
+type CollectionResult = Result<ProducerResults, self_stats::FailureReason>;
+
+/// Information about when we start a collection.
+struct CollectionStartTimes {
+    /// UTC timestamp at which the request was started.
+    started_at: DateTime<Utc>,
+    /// Instant right before we queued the response for processing.
+    queued_at: Instant,
+}
+
+impl CollectionStartTimes {
+    fn new() -> Self {
+        Self { started_at: Utc::now(), queued_at: Instant::now() }
+    }
+}
+
+/// Details about a forced collection.
+struct ForcedCollectionRequest {
+    /// The collection token we signal when the collection is completed.
+    token: CollectionToken,
+    /// Start time for this collection.
+    start: CollectionStartTimes,
+}
+
+impl ForcedCollectionRequest {
+    fn new(token: CollectionToken) -> Self {
+        Self { token, start: CollectionStartTimes::new() }
+    }
+}
+
+/// Details about a completed collection.
+struct CollectionResponse {
+    /// Token for a forced collection request.
+    token: Option<CollectionToken>,
+    /// The actual result of the collection.
+    result: CollectionResult,
+    /// Time when the collection started.
+    started_at: DateTime<Utc>,
+    /// Time the request spent queued.
+    time_queued: Duration,
+    /// Time we spent processing the request.
+    time_collecting: Duration,
+}
+
+/// Task that actually performs collections from the producer.
+async fn collection_loop(
+    log: Logger,
+    mut producer_info_rx: watch::Receiver<ProducerEndpoint>,
+    mut forced_collection_rx: mpsc::Receiver<ForcedCollectionRequest>,
+    mut timer_collection_rx: mpsc::Receiver<CollectionStartTimes>,
+    result_tx: mpsc::Sender<CollectionResponse>,
+) {
+    let client = reqwest::Client::builder()
+        .timeout(COLLECTION_TIMEOUT)
+        .build()
+        // Safety: `build()` only fails if TLS couldn't be initialized or the
+        // system DNS configuration could not be loaded.
+        .unwrap();
+    loop {
+        // Wait for notification that we have a collection to perform, from
+        // either the forced- or timer-collection queue.
+        trace!(log, "top of inner collection loop, waiting for next request");
+        let (maybe_token, start_time) = tokio::select! {
+            maybe_request = forced_collection_rx.recv() => {
+                let Some(ForcedCollectionRequest { token, start }) = maybe_request else {
+                    debug!(
+                        log,
+                        "forced collection request queue closed, exiting"
+                    );
+                    return;
+                };
+                (Some(token), start)
+            }
+            maybe_request = timer_collection_rx.recv() => {
+                let Some(start) = maybe_request else {
+                    debug!(
+                        log,
+                        "timer collection request queue closed, exiting"
+                    );
+                    return;
+                };
+                (None, start)
+            }
+        };
+
+        // Record the time this request was queued. We'll include this along
+        // with the time spent collecting, which is returned from the future
+        // that actually does the collection.
+        let CollectionStartTimes { started_at, queued_at } = start_time;
+        let time_queued = queued_at.elapsed();
+
+        // Make a future to represent the actual collection.
+        let mut collection_fut = Box::pin(perform_collection(
+            log.clone(),
+            client.clone(),
+            *producer_info_rx.borrow_and_update(),
+        ));
+
+        // Wait for that collection to complete or fail, or for an update to the
+        // producer's information. In the latter case, recreate the future for
+        // the collection itself with the new producer information.
+        let SingleCollectionResult { result, duration } = 'collection: loop {
+            tokio::select! {
+                biased;
+
+                maybe_update = producer_info_rx.changed() => {
+                    match maybe_update {
+                        Ok(_) => {
+                            let update = *producer_info_rx.borrow_and_update();
+                            debug!(
+                                log,
+                                "received producer info update with an outstanding \
+                                collection running, cancelling it and recreating \
+                                with the new info";
+                                "new_info" => ?&update,
+                            );
+                            collection_fut = Box::pin(perform_collection(
+                                log.new(o!("address" => update.address)),
+                                client.clone(),
+                                update,
+                            ));
+                            continue 'collection;
+                        }
+                        Err(e) => {
+                            error!(
+                                log,
+                                "failed to receive on producer update \
+                                watch channel, exiting";
+                                InlineErrorChain::new(&e),
+                            );
+                            return;
+                        }
+                    }
+                }
+
+                collection_result = &mut collection_fut => {
+                    // NOTE: This break here is intentional. We cannot just call
+                    // `result_tx.send()` in this loop, because that moves out
+                    // of `maybe_token`, which isn't Copy. Break the loop, and
+                    // then send it after we know we've completed the
+                    // collection.
+                    break 'collection collection_result;
+                }
+            }
+        };
+
+        // Now that the collection has completed, send on the results, along
+        // with the timing information and any collection token we may have
+        // gotten with the request.
+        let response = CollectionResponse {
+            token: maybe_token,
+            result,
+            started_at,
+            time_queued,
+            time_collecting: duration,
+        };
+        match result_tx.send(response).await {
+            Ok(_) => trace!(log, "forwarded results to main collection loop"),
+            Err(_) => {
+                error!(
+                    log,
+                    "failed to forward results to \
+                    collection loop, channel is closed, exiting",
+                );
+                return;
+            }
+        }
+    }
+}
+
+/// Type of each output sent from a collection task to the results sink.
+pub type CollectionTaskOutput = (Option<CollectionToken>, ProducerResults);
+
+/// Handle to the task which collects metric data from a single producer.
+#[derive(Debug)]
+pub struct CollectionTaskHandle {
+    /// Information about the producer we're currently collecting from.
+    pub producer: ProducerEndpoint,
+    // Channel used to send messages from the agent to the actual task.
+    //
+    // The task owns the other side.
+    task_tx: mpsc::Sender<CollectionMessage>,
+    log: Logger,
+}
+
+impl CollectionTaskHandle {
+    /// Create a new collection task handle.
+    ///
+    /// This spawns the actual task itself, and returns a handle to it. The
+    /// latter is used to send messages to the task, through the handle's
+    /// `inbox` field.
+    pub async fn new(
+        log: &Logger,
+        collector: self_stats::OximeterCollector,
+        producer: ProducerEndpoint,
+        outbox: mpsc::Sender<CollectionTaskOutput>,
+    ) -> Self {
+        let (task, task_tx) =
+            CollectionTask::new(log, collector, producer, outbox).await;
+        tokio::spawn(task.run());
+        let log = log.new(o!(
+            "component" => "collection-task-handle",
+            "producer_id" => producer.id.to_string(),
+        ));
+        Self { task_tx, producer, log }
+    }
+
+    /// Ask the task to update its producer endpoint information.
+    ///
+    /// # Panics
+    ///
+    /// This panics if we could not send a message to the internal collection
+    /// task. That only happens when that task has exited.
+    pub async fn update(&mut self, info: ProducerEndpoint) {
+        match self.task_tx.send(CollectionMessage::Update(info)).await {
+            Ok(_) => {
+                trace!(
+                    self.log,
+                    "sent update message to task";
+                    "new_info" => ?info,
+                );
+                self.producer = info;
+            }
+            Err(e) => {
+                error!(
+                    self.log,
+                    "failed to send update message to task!";
+                    "error" => InlineErrorChain::new(&e),
+                );
+                panic!("failed to send update message to task: {}", e);
+            }
+        }
+    }
+
+    /// Ask the collection task to shutdown.
+    pub async fn shutdown(&self) {
+        match self.task_tx.send(CollectionMessage::Shutdown).await {
+            Ok(_) => trace!(self.log, "sent shutdown message to task"),
+            Err(e) => error!(
+                self.log,
+                "failed to send shutdown message to task!";
+                "error" => InlineErrorChain::new(&e),
+            ),
+        }
+    }
+
+    /// Return the current statistics from this task.
+    #[cfg(test)]
+    pub async fn statistics(&self) -> self_stats::CollectionTaskStats {
+        let (reply_tx, rx) = oneshot::channel();
+        self.task_tx
+            .send(CollectionMessage::Statistics { reply_tx })
+            .await
+            .expect("Failed to send statistics message");
+        rx.await.expect("Failed to receive statistics")
+    }
+
+    /// Return details about the current producer this task is collecting from.
+    ///
+    /// An error is returned if we either could not send the request to the
+    /// producer because its queue is full, or because the task failed to send
+    /// us the response.
+    ///
+    /// Note that this makes collecting details best-effort -- if the task is
+    /// already doing lots of work and its queue is full, we fail rather than
+    /// block.
+    pub async fn details(&self) -> Result<ProducerDetails, Error> {
+        let (reply_tx, rx) = oneshot::channel();
+        if self
+            .task_tx
+            .try_send(CollectionMessage::Details { reply_tx })
+            .is_err()
+        {
+            return Err(Error::CollectionError(
+                self.producer.id,
+                String::from(
+                    "Failed to send detail request to collection task",
+                ),
+            ));
+        }
+        rx.await.map_err(|_| {
+            Error::CollectionError(
+                self.producer.id,
+                String::from(
+                    "Failed to receive detail response from collection task",
+                ),
+            )
+        })
+    }
+
+    /// Explicitly request that the task collect from its producer now.
+    ///
+    /// Note that this doesn't block, instead returning a oneshot that will
+    /// resolve when the collection completes.
+    pub fn collect(
+        &self,
+    ) -> oneshot::Receiver<Result<(), ForcedCollectionError>> {
+        let (tx, rx) = oneshot::channel();
+        match self.task_tx.try_send(CollectionMessage::Collect(tx)) {
+            Ok(_) => rx,
+            Err(err) => {
+                let (err, msg) = match err {
+                    TrySendError::Full(msg) => {
+                        (ForcedCollectionError::QueueFull, msg)
+                    }
+                    TrySendError::Closed(msg) => {
+                        (ForcedCollectionError::Closed, msg)
+                    }
+                };
+                let CollectionMessage::Collect(tx) = msg else {
+                    unreachable!();
+                };
+                // Safety: In this case, we own both sides of the channel and we
+                // know nothing has been sent on it. This can't fail.
+                tx.send(Err(err)).unwrap();
+                rx
+            }
+        }
+    }
+}
+
+/// Helper type used to simplify control flow in the main `CollectionTask::run`
+/// method.
+type TaskAction = std::ops::ControlFlow<()>;
+
+/// Main task used to dispatch messages from the oximeter agent and request
+/// collections from the producer.
+#[derive(Debug)]
+struct CollectionTask {
+    log: Logger,
+
+    // The details about past collections from this producer.
+    details: ProducerDetails,
+
+    // Statistics about all collections we've made so far.
+    stats: self_stats::CollectionTaskStats,
+
+    // Inbox for messages from the controlling task handle.
+    inbox: mpsc::Receiver<CollectionMessage>,
+
+    // Watch channel for broadcasting changes about the producer.
+    producer_info_tx: watch::Sender<ProducerEndpoint>,
+
+    // Channel for sending forced collection requests.
+    forced_collection_tx: mpsc::Sender<ForcedCollectionRequest>,
+
+    // Channel for sending timer-based collection requests.
+    timer_collection_tx: mpsc::Sender<CollectionStartTimes>,
+
+    // Channel for receiving collection responses from the inner collection
+    // loop.
+    result_rx: mpsc::Receiver<CollectionResponse>,
+
+    // Outbox for forwarding the results to the sink.
+    outbox: mpsc::Sender<CollectionTaskOutput>,
+
+    // Timer for making collections periodically.
+    collection_timer: Interval,
+
+    // Timer for reporting our own collection statistics to the database.
+    self_collection_timer: Interval,
+}
+
+impl CollectionTask {
+    // Construct a new collection task.
+    //
+    // This also spawns the internal task which itself manages the collections
+    // from our assigned producer. It then creates all the controlling queues
+    // for talking to this task and the inner task.
+    async fn new(
+        log: &Logger,
+        collector: self_stats::OximeterCollector,
+        producer: ProducerEndpoint,
+        outbox: mpsc::Sender<CollectionTaskOutput>,
+    ) -> (Self, mpsc::Sender<CollectionMessage>) {
+        // Create our own logger.
+        let log = log.new(o!(
+            "component" => "collection-task",
+            "producer_id" => producer.id.to_string(),
+        ));
+
+        // Setup queues for talking between ourselves, our controlling task
+        // handle, and the spawned collection loop itself.
+        let (task_tx, inbox) = mpsc::channel(N_QUEUED_TASK_MESSAGES);
+        let (producer_info_tx, producer_info_rx) = watch::channel(producer);
+        let (forced_collection_tx, forced_collection_rx) =
+            mpsc::channel(N_QUEUED_FORCED_COLLECTIONS);
+        let (timer_collection_tx, timer_collection_rx) =
+            mpsc::channel(N_QUEUED_TIMER_COLLECTIONS);
+        let (result_tx, result_rx) = mpsc::channel(N_QUEUED_RESULTS);
+        tokio::task::spawn(collection_loop(
+            log.clone(),
+            producer_info_rx,
+            forced_collection_rx,
+            timer_collection_rx,
+            result_tx,
+        ));
+
+        // Construct ourself, and return our controlling input queue.
+        let details = ProducerDetails::new(&producer);
+        let stats = self_stats::CollectionTaskStats::new(collector, &producer);
+        let collection_timer = Self::timer(producer.interval).await;
+        let self_collection_timer =
+            Self::timer(self_stats::COLLECTION_INTERVAL).await;
+        let self_ = Self {
+            log,
+            details,
+            stats,
+            inbox,
+            outbox,
+            producer_info_tx,
+            forced_collection_tx,
+            timer_collection_tx,
+            result_rx,
+            collection_timer,
+            self_collection_timer,
+        };
+        (self_, task_tx)
+    }
+
+    /// Helper to construct a timer and tick it.
+    ///
+    /// Since a `tokio::time::interval`'s first tick completes immediately, this
+    /// constructs the timer and then _ticks it_ once.
+    async fn timer(t: Duration) -> Interval {
+        let mut timer = interval(t);
+        timer.tick().await;
+        timer
+    }
+
+    /// Run the main loop of this collection task.
+    ///
+    /// NOTE: This returns a `TaskAction`, but the value isn't used. It returns
+    /// that value to simplify control-flow internally, which uses `?` to
+    /// propagate the `TaskAction::Break` variant when we need to exit.
+    async fn run(mut self) -> TaskAction {
+        loop {
+            tokio::select! {
+                message = self.inbox.recv() => {
+                    let Some(message) = message else {
+                        debug!(
+                            self.log,
+                            "collection task inbox closed, shutting down"
+                        );
+                        return TaskAction::Break(());
+                    };
+                    self.handle_inbox_message(message).await?;
+                }
+                maybe_result = self.result_rx.recv() => {
+                    let Some(response) = maybe_result else {
+                        error!(
+                            self.log,
+                            "channel for receiving results from collection task \
+                            is closed, exiting",
+                        );
+                        return TaskAction::Break(());
+                    };
+                    self.handle_collection_response(response).await?;
+                }
+                _ = self.self_collection_timer.tick() => {
+                    debug!(
+                        self.log,
+                        "reporting oximeter self-collection statistics"
+                    );
+                    self.outbox.send((None, self.stats.sample())).await.unwrap();
+                }
+                _ = self.collection_timer.tick() => {
+                    self.handle_collection_timer_tick().await?;
+                }
+            }
+        }
+    }
+
+    /// Handle a single message from the task handle.
+    ///
+    /// This method takes messages from the main oximeter agent, passed through
+    /// our controlling handle. This implements the main public API of the
+    /// `CollectionTaskHandle` methods that the agent uses.
+    async fn handle_inbox_message(
+        &mut self,
+        message: CollectionMessage,
+    ) -> TaskAction {
+        match message {
+            CollectionMessage::Shutdown => {
+                debug!(self.log, "collection task received shutdown request");
+                return TaskAction::Break(());
+            }
+            CollectionMessage::Collect(token) => {
+                debug!(
+                    self.log,
+                    "collection task received explicit request to collect"
+                );
+                let request = ForcedCollectionRequest::new(token);
+                match self.forced_collection_tx.try_send(request) {
+                    Ok(_) => {
+                        trace!(
+                            self.log,
+                            "forwarded explicit request to collection task"
+                        );
+                    }
+                    Err(e) => match e {
+                        TrySendError::Closed(ForcedCollectionRequest {
+                            token,
+                            ..
+                        }) => {
+                            debug!(
+                                self.log,
+                                "collection task forced collection \
+                                queue is closed. Attempting to \
+                                notify caller and exiting.",
+                            );
+                            let _ =
+                                token.send(Err(ForcedCollectionError::Closed));
+                            return TaskAction::Break(());
+                        }
+                        TrySendError::Full(ForcedCollectionRequest {
+                            token,
+                            start,
+                        }) => {
+                            error!(
+                                self.log,
+                                "collection task forced collection \
+                                queue is full! This should never \
+                                happen, and probably indicates \
+                                a bug in your test code, such as \
+                                calling `force_collection()` many \
+                                times"
+                            );
+                            if token
+                                .send(Err(ForcedCollectionError::QueueFull))
+                                .is_err()
+                            {
+                                warn!(
+                                    self.log,
+                                    "failed to notify caller of \
+                                    force_collection(), oneshot is \
+                                    closed"
+                                );
+                            }
+                            let failure = FailedCollection {
+                                started_at: start.started_at,
+                                time_queued: Duration::ZERO,
+                                time_collecting: Duration::ZERO,
+                                reason: String::from(
+                                    "forced collection queue full",
+                                ),
+                            };
+                            self.details.on_failure(failure);
+                        }
+                    },
+                }
+            }
+            CollectionMessage::Update(new_info) => {
+                // If the collection interval is shorter than the
+                // interval on which we receive these update messages,
+                // we'll never actually collect anything! Instead, only
+                // do the update if the information has changed. This
+                // should also be guarded against by the main agent, but
+                // we're being cautious here.
+                let updated_producer_info = |info: &mut ProducerEndpoint| {
+                    if new_info == *info {
+                        false
+                    } else {
+                        *info = new_info;
+                        true
+                    }
+                };
+                if !self
+                    .producer_info_tx
+                    .send_if_modified(updated_producer_info)
+                {
+                    trace!(
+                        self.log,
+                        "collection task received update with \
+                        identical producer information, no \
+                        updates will be sent to the collection task"
+                    );
+                    return TaskAction::Continue(());
+                }
+
+                // We have an actual update to the producer information.
+                //
+                // Rebuild our timer to reflect the possibly-new
+                // interval. The collection task has already been
+                // notified above.
+                debug!(
+                    self.log,
+                    "collection task received request to update \
+                    its producer information";
+                    "interval" => ?new_info.interval,
+                    "address" => new_info.address,
+                );
+                self.details.update(&new_info);
+                self.stats.update(&new_info);
+                self.collection_timer = Self::timer(new_info.interval).await;
+            }
+            #[cfg(test)]
+            CollectionMessage::Statistics { reply_tx } => {
+                // Time should be paused when using this retrieval
+                // mechanism. We advance time to cause a panic if this
+                // message were to be sent with time *not* paused.
+                tokio::time::advance(Duration::from_nanos(1)).await;
+                // The collection timer *may* be ready to go in which
+                // case we would do a collection right after
+                // processesing this message, thus changing the actual
+                // data. Instead we reset the timer to prevent
+                // additional collections (i.e. since time is paused).
+                self.collection_timer.reset();
+                debug!(
+                    self.log,
+                    "received request for current task statistics"
+                );
+                reply_tx
+                    .send(self.stats.clone())
+                    .expect("failed to send statistics");
+            }
+            CollectionMessage::Details { reply_tx } => {
+                match reply_tx.send(self.details.clone()) {
+                    Ok(_) => trace!(
+                        self.log,
+                        "sent producer details reply to oximeter agent",
+                    ),
+                    Err(e) => error!(
+                        self.log,
+                        "failed to send producer details reply to \
+                        oximeter agent";
+                        "error" => ?e,
+                    ),
+                }
+            }
+        }
+
+        // Continue unless we explicitly exit early.
+        TaskAction::Continue(())
+    }
+
+    /// Handle a single collection response from the inner collection task.
+    ///
+    /// This takes responses from the spawned task that actually does
+    /// collections, and dispatches them to the results sink. It also updates
+    /// our own details and collection stats accordingly.
+    async fn handle_collection_response(
+        &mut self,
+        response: CollectionResponse,
+    ) -> TaskAction {
+        let CollectionResponse {
+            token,
+            result,
+            started_at,
+            time_queued,
+            time_collecting,
+        } = response;
+        match result {
+            Ok(results) => {
+                self.stats.collections.datum.increment();
+                let n_samples: u64 = results
+                    .iter()
+                    .map(|each| match each {
+                        ProducerResultsItem::Ok(samples) => {
+                            samples.len() as u64
+                        }
+                        _ => 0,
+                    })
+                    .sum();
+                let success = SuccessfulCollection {
+                    started_at,
+                    time_queued,
+                    time_collecting,
+                    n_samples,
+                };
+                self.details.on_success(success);
+                if self.outbox.send((token, results)).await.is_err() {
+                    error!(
+                        self.log,
+                        "failed to send results to outbox, channel is \
+                        closed, exiting",
+                    );
+                    return TaskAction::Break(());
+                }
+            }
+            Err(reason) => {
+                let failure = FailedCollection {
+                    started_at,
+                    time_queued,
+                    time_collecting,
+                    reason: reason.to_string(),
+                };
+                self.details.on_failure(failure);
+                self.stats.failures_for_reason(reason).datum.increment();
+            }
+        }
+        TaskAction::Continue(())
+    }
+
+    async fn handle_collection_timer_tick(&mut self) -> TaskAction {
+        match self.timer_collection_tx.try_send(CollectionStartTimes::new()) {
+            Ok(_) => {
+                debug!(
+                    self.log,
+                    "sent timer-based collection request to \
+                    the collection task"
+                );
+            }
+            Err(TrySendError::Closed(_)) => {
+                error!(
+                    self.log,
+                    "timer-based collection request queue is \
+                    closed, exiting"
+                );
+                return TaskAction::Break(());
+            }
+            Err(TrySendError::Full(start)) => {
+                let failure = FailedCollection {
+                    started_at: start.started_at,
+                    time_queued: Duration::ZERO,
+                    time_collecting: Duration::ZERO,
+                    reason: String::from("collections in progress"),
+                };
+                self.details.on_failure(failure);
+                error!(
+                    self.log,
+                    "timer-based collection request queue is \
+                    full! This may indicate that the producer \
+                    has a sampling interval that is too fast \
+                    for the amount of data it generates";
+                    "interval" => ?self.producer_info_tx.borrow().interval,
+                );
+                self.stats
+                    .failures_for_reason(
+                        self_stats::FailureReason::CollectionsInProgress,
+                    )
+                    .datum
+                    .increment()
+            }
+        }
+        TaskAction::Continue(())
+    }
+}
diff --git a/oximeter/collector/src/http_entrypoints.rs b/oximeter/collector/src/http_entrypoints.rs
index 1962262453..61777daf2b 100644
--- a/oximeter/collector/src/http_entrypoints.rs
+++ b/oximeter/collector/src/http_entrypoints.rs
@@ -52,6 +52,19 @@ impl OximeterApi for OximeterApiImpl {
         .map(HttpResponseOk)
     }
 
+    async fn producer_details(
+        request_context: RequestContext<Self::Context>,
+        path: dropshot::Path<ProducerIdPathParams>,
+    ) -> Result<HttpResponseOk<ProducerDetails>, HttpError> {
+        let agent = request_context.context();
+        let producer_id = path.into_inner().producer_id;
+        agent
+            .producer_details(producer_id)
+            .await
+            .map_err(HttpError::from)
+            .map(HttpResponseOk)
+    }
+
     async fn producer_delete(
         request_context: RequestContext<Self::Context>,
         path: dropshot::Path<ProducerIdPathParams>,
diff --git a/oximeter/collector/src/lib.rs b/oximeter/collector/src/lib.rs
index cc0ef92c13..54044b0068 100644
--- a/oximeter/collector/src/lib.rs
+++ b/oximeter/collector/src/lib.rs
@@ -4,9 +4,9 @@
 
 //! Implementation of the `oximeter` metric collection server.
 
-// Copyright 2023 Oxide Computer Company
+// Copyright 2024 Oxide Computer Company
 
-pub use agent::ForcedCollectionError;
+pub use collection_task::ForcedCollectionError;
 use dropshot::ConfigDropshot;
 use dropshot::ConfigLogging;
 use dropshot::HttpError;
@@ -42,7 +42,9 @@ use thiserror::Error;
 use uuid::Uuid;
 
 mod agent;
+mod collection_task;
 mod http_entrypoints;
+mod results_sink;
 mod self_stats;
 mod standalone;
 
@@ -65,11 +67,18 @@ pub enum Error {
 
     #[error("Error running standalone")]
     Standalone(#[from] anyhow::Error),
+
+    #[error("No registered producer with id '{id}'")]
+    NoSuchProducer { id: Uuid },
 }
 
 impl From<Error> for HttpError {
     fn from(e: Error) -> Self {
-        HttpError::for_internal_error(e.to_string())
+        if let Error::NoSuchProducer { .. } = e {
+            HttpError::for_not_found(None, e.to_string())
+        } else {
+            HttpError::for_internal_error(e.to_string())
+        }
     }
 }
 
diff --git a/oximeter/collector/src/results_sink.rs b/oximeter/collector/src/results_sink.rs
new file mode 100644
index 0000000000..3013d472b6
--- /dev/null
+++ b/oximeter/collector/src/results_sink.rs
@@ -0,0 +1,148 @@
+// This Source Code Form is subject to the terms of the Mozilla Public
+// License, v. 2.0. If a copy of the MPL was not distributed with this
+// file, You can obtain one at https://mozilla.org/MPL/2.0/.
+
+//! Tasks acting as sinks for results.
+//!
+//! This includes the usual task that inserts data into ClickHouse, and a
+//! printing task used in `oximeter` standalone.
+
+// Copyright 2024 Oxide Computer Company
+
+use crate::collection_task::CollectionTaskOutput;
+use oximeter::types::ProducerResultsItem;
+use oximeter_db::Client;
+use oximeter_db::DbWrite as _;
+use slog::debug;
+use slog::error;
+use slog::info;
+use slog::trace;
+use slog::warn;
+use slog::Logger;
+use slog_error_chain::InlineErrorChain;
+use std::time::Duration;
+use tokio::sync::mpsc;
+use tokio::time::interval;
+
+/// A sink that inserts all results into the ClickHouse database.
+///
+/// This sink is used in production, when running the `oximeter` collector
+/// normally. It aggregates all results, from all collection tasks, and inserts
+/// them into ClickHouse in batches.
+pub async fn database_inserter(
+    log: Logger,
+    client: Client,
+    batch_size: usize,
+    batch_interval: Duration,
+    mut rx: mpsc::Receiver<CollectionTaskOutput>,
+) {
+    let mut timer = interval(batch_interval);
+    timer.tick().await; // completes immediately
+    let mut batch = Vec::with_capacity(batch_size);
+    loop {
+        let mut collection_token = None;
+        let insert = tokio::select! {
+            _ = timer.tick() => {
+                if batch.is_empty() {
+                    trace!(log, "batch interval expired, but no samples to insert");
+                    false
+                } else {
+                    true
+                }
+            }
+            results = rx.recv() => {
+                match results {
+                    Some((token, results)) => {
+                        let flattened_results = {
+                            let mut flattened = Vec::with_capacity(results.len());
+                            for inner_batch in results.into_iter() {
+                                match inner_batch {
+                                    ProducerResultsItem::Ok(samples) => flattened.extend(samples.into_iter()),
+                                    ProducerResultsItem::Err(e) => {
+                                        debug!(
+                                            log,
+                                            "received error (not samples) from a producer: {}",
+                                            e.to_string()
+                                        );
+                                    }
+                                }
+                            }
+                            flattened
+                        };
+                        batch.extend(flattened_results);
+
+                        collection_token = token;
+                        if collection_token.is_some() {
+                            true
+                        } else {
+                            batch.len() >= batch_size
+                        }
+                    }
+                    None => {
+                        warn!(log, "result queue closed, exiting");
+                        return;
+                    }
+                }
+            }
+        };
+
+        if insert {
+            debug!(log, "inserting {} samples into database", batch.len());
+            match client.insert_samples(&batch).await {
+                Ok(()) => trace!(log, "successfully inserted samples"),
+                Err(e) => {
+                    warn!(
+                        log,
+                        "failed to insert some results into metric DB: {}",
+                        e.to_string()
+                    );
+                }
+            }
+            // TODO-correctness The `insert_samples` call above may fail. The method itself needs
+            // better handling of partially-inserted results in that case, but we may need to retry
+            // or otherwise handle an error here as well.
+            //
+            // See https://github.com/oxidecomputer/omicron/issues/740 for a
+            // disucssion.
+            batch.clear();
+        }
+
+        if let Some(token) = collection_token {
+            let _ = token.send(Ok(()));
+        }
+    }
+}
+
+/// A sink run in `oximeter` standalone, that logs results on receipt.
+pub async fn logger(log: Logger, mut rx: mpsc::Receiver<CollectionTaskOutput>) {
+    loop {
+        match rx.recv().await {
+            Some((_, results)) => {
+                for res in results.into_iter() {
+                    match res {
+                        ProducerResultsItem::Ok(samples) => {
+                            for sample in samples.into_iter() {
+                                info!(
+                                    log,
+                                    "";
+                                    "sample" => ?sample,
+                                );
+                            }
+                        }
+                        ProducerResultsItem::Err(e) => {
+                            error!(
+                                log,
+                                "received error from a producer";
+                                InlineErrorChain::new(&e),
+                            );
+                        }
+                    }
+                }
+            }
+            None => {
+                debug!(log, "result queue closed, exiting");
+                return;
+            }
+        }
+    }
+}
diff --git a/oximeter/collector/src/self_stats.rs b/oximeter/collector/src/self_stats.rs
index 2ab7b201e5..ff8776c031 100644
--- a/oximeter/collector/src/self_stats.rs
+++ b/oximeter/collector/src/self_stats.rs
@@ -99,6 +99,32 @@ impl CollectionTaskStats {
         }
     }
 
+    /// Update this information with a new producer endpoint.
+    ///
+    /// # Panics
+    ///
+    /// This panics if `new_info` refers to a different ID.
+    pub fn update(&mut self, new_info: &ProducerEndpoint) {
+        assert_eq!(self.collections.producer_id, new_info.id);
+
+        // Only reset the counters if the new information is actually different.
+        let new_ip = new_info.address.ip();
+        let new_port = new_info.address.port();
+        if self.collections.producer_ip == new_ip
+            && self.collections.producer_port == new_port
+        {
+            return;
+        }
+        self.collections.producer_ip = new_ip;
+        self.collections.producer_port = new_port;
+        self.collections.datum = Cumulative::new(0);
+        for each in self.failed_collections.values_mut() {
+            each.producer_ip = new_ip;
+            each.producer_port = new_port;
+            each.datum = Cumulative::new(0);
+        }
+    }
+
     pub fn failures_for_reason(
         &mut self,
         reason: FailureReason,
@@ -135,18 +161,64 @@ impl CollectionTaskStats {
 
 #[cfg(test)]
 mod tests {
+    use super::CollectionTaskStats;
     use super::FailureReason;
+    use super::OximeterCollector;
     use super::StatusCode;
+    use omicron_common::api::internal::nexus::ProducerEndpoint;
+    use omicron_common::api::internal::nexus::ProducerKind;
+    use std::time::Duration;
+    use uuid::Uuid;
 
     #[test]
     fn test_failure_reason_serialization() {
         let data = &[
-            (FailureReason::Deserialization, "deserialization"),
-            (FailureReason::Unreachable, "unreachable"),
+            (FailureReason::Deserialization, FailureReason::DESERIALIZATION),
+            (FailureReason::Unreachable, FailureReason::UNREACHABLE),
+            (
+                FailureReason::CollectionsInProgress,
+                FailureReason::COLLECTIONS_IN_PROGRESS,
+            ),
             (FailureReason::Other(StatusCode::INTERNAL_SERVER_ERROR), "500"),
         ];
         for (variant, as_str) in data.iter() {
             assert_eq!(variant.to_string(), *as_str);
         }
     }
+
+    #[test]
+    fn only_reset_counters_if_info_is_different() {
+        let info = ProducerEndpoint {
+            id: Uuid::new_v4(),
+            kind: ProducerKind::Service,
+            address: "[::1]:12345".parse().unwrap(),
+            interval: Duration::from_secs(1),
+        };
+        let collector = OximeterCollector {
+            collector_id: Uuid::new_v4(),
+            collector_ip: "::1".parse().unwrap(),
+            collector_port: 12345,
+        };
+        let mut stats = CollectionTaskStats::new(collector, &info);
+        stats.collections.datum.increment();
+
+        stats.update(&info);
+        assert_eq!(
+            stats.collections.datum.value(),
+            1,
+            "Should not have reset the counter when updating \
+            with the same producer endpoint information"
+        );
+        let info = ProducerEndpoint {
+            address: "[::1]:11111".parse().unwrap(),
+            ..info
+        };
+        stats.update(&info);
+        assert_eq!(
+            stats.collections.datum.value(),
+            0,
+            "Should have reset the counter when updating \
+            with different producer endpoint information"
+        );
+    }
 }
diff --git a/package-manifest.toml b/package-manifest.toml
index 789d1eb0c0..809c1ce6ca 100644
--- a/package-manifest.toml
+++ b/package-manifest.toml
@@ -578,10 +578,10 @@ only_for_targets.image = "standard"
 # 3. Use source.type = "manual" instead of "prebuilt"
 source.type = "prebuilt"
 source.repo = "crucible"
-source.commit = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source.commit = "5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image/<commit>/crucible.sha256.txt
-source.sha256 = "0276c1513b33c61c866eb31756879e9d079534f43af90b01c0a2dd152c6ce18d"
+source.sha256 = "bcccfb03a68e46bb958410faf6f619e25f5ec9ccc65c503aeb87bb7ad456e517"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -590,10 +590,10 @@ service_name = "crucible_pantry_prebuilt"
 only_for_targets.image = "standard"
 source.type = "prebuilt"
 source.repo = "crucible"
-source.commit = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source.commit = "5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image/<commit>/crucible-pantry.sha256.txt
-source.sha256 = "7ad4f84df681f5ccd90bd74473a17a0e1310f562bfd0c08047aad6adbd131903"
+source.sha256 = "96326422f79413fe31bb1c7df6173b2991b463cabc5b1fb4182db703500c8882"
 output.type = "zone"
 output.intermediate_only = true
 
@@ -607,10 +607,10 @@ service_name = "crucible_dtrace"
 only_for_targets.image = "standard"
 source.type = "prebuilt"
 source.repo = "crucible"
-source.commit = "2cfc7e0c8572b3bfafbfc838c4e6d658f442d239"
+source.commit = "5a41b826171c7d2a8412fa833377ab1df25ee8ec"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/crucible/image/<commit>/crucible-dtrace.sha256.txt
-source.sha256 = "dac88622ecf6e3529b9d83390607c921723eca26de68b0801efd66c36acfa629"
+source.sha256 = "d35ed81a1e58ec66b621938f4b57513c1a3eb0b66e21834e000e0ace9624b462"
 output.type = "tarball"
 
 # Refer to
@@ -621,10 +621,10 @@ service_name = "propolis-server"
 only_for_targets.image = "standard"
 source.type = "prebuilt"
 source.repo = "propolis"
-source.commit = "6936f1a949d155da38d3148abd42caef337dea04"
+source.commit = "220a6f367c18f2452dbc4fa9086f3fe73b961739"
 # The SHA256 digest is automatically posted to:
 # https://buildomat.eng.oxide.computer/public/file/oxidecomputer/propolis/image/<commit>/propolis-server.sha256.txt
-source.sha256 = "a3a45292bd45938a785b84afee39f690a5f05d1920b78b8fc0512a131857d7ee"
+source.sha256 = "964bf262677496118f8cea95c257d0a57c76ddca70733217b0666657b53bd6e6"
 output.type = "zone"
 
 [package.mg-ddm-gz]
diff --git a/package/src/bin/omicron-package.rs b/package/src/bin/omicron-package.rs
index c828c90432..cc4050cbce 100644
--- a/package/src/bin/omicron-package.rs
+++ b/package/src/bin/omicron-package.rs
@@ -108,6 +108,7 @@ struct Args {
 #[derive(Debug, Default)]
 struct CargoPlan<'a> {
     command: &'a str,
+    packages: BTreeSet<&'a String>,
     bins: BTreeSet<&'a String>,
     features: BTreeSet<&'a String>,
     release: bool,
@@ -123,6 +124,12 @@ impl<'a> CargoPlan<'a> {
         // We rely on the rust-toolchain.toml file for toolchain information,
         // rather than specifying one within the packaging tool.
         cmd.arg(self.command);
+        // We specify _both_ --package and --bin; --bin does not imply
+        // --package, and without any --package options Cargo unifies features
+        // across all workspace default members. See rust-lang/cargo#8157.
+        for package in &self.packages {
+            cmd.arg("--package").arg(package);
+        }
         for bin in &self.bins {
             cmd.arg("--bin").arg(bin);
         }
@@ -185,9 +192,12 @@ async fn do_for_all_rust_packages(
     let mut debug = CargoPlan { command, release: false, ..Default::default() };
 
     for (name, pkg) in config.packages_to_build().0 {
-        // If this is a Rust package...
+        // If this is a Rust package, `name` (the map key) is the name of the
+        // corresponding Rust crate.
         if let PackageSource::Local { rust: Some(rust_pkg), .. } = &pkg.source {
             let plan = if rust_pkg.release { &mut release } else { &mut debug };
+            // Add the package name to the plan
+            plan.packages.insert(name);
             // Get the package metadata
             let metadata = workspace_pkgs.get(name).with_context(|| {
                 format!("package '{name}' is not a workspace package")
diff --git a/tools/console_version b/tools/console_version
index 85363dcf35..08078c264e 100644
--- a/tools/console_version
+++ b/tools/console_version
@@ -1,2 +1,2 @@
-COMMIT="059c55142ba29e4f691247def92493b7ef3f8df2"
-SHA2="dfb98bf8eb4f97ff65be8128a456dbbf98dfbda48988a4b683bf0894d57381ba"
+COMMIT="927c8b63a6f97c230cd8766a80fa1cfef6429eb4"
+SHA2="96550b6e485aaee1c6ced00a4a1aeec86267c99fc79a4b2b253141cf0222d346"
diff --git a/tools/opte_version b/tools/opte_version
index adc2ee64e8..a80da921ae 100644
--- a/tools/opte_version
+++ b/tools/opte_version
@@ -1 +1 @@
-0.34.301
+0.34.311
diff --git a/tools/opte_version_override b/tools/opte_version_override
index 0a98b51ca1..6ababee9f1 100644
--- a/tools/opte_version_override
+++ b/tools/opte_version_override
@@ -2,4 +2,4 @@
 
 # only set this if you want to override the version of opte/xde installed by the
 # install_opte.sh script
-OPTE_COMMIT="98247c27846133a80fdb8f730f0c57e72d766561"
+OPTE_COMMIT="b56afeeb14e0042cbd7bda85b166ed86ee17820e"
diff --git a/tools/permslip_production b/tools/permslip_production
index ce73c3c6da..5c84faad8f 100644
--- a/tools/permslip_production
+++ b/tools/permslip_production
@@ -1,2 +1,2 @@
 a72a5f931bcfd3d931df407fbbba6d851165c4637adf39568a94f755966b6c9c manifest-oxide-rot-1-v1.0.30.toml
-610ebce44b1fb622eb56591534fb2569340fdba9b5ba62ca1b02f0b2d2e973dc manifest-bootleby-v1.3.1.toml
+9d5faa910e8e8e7aaeb74df972badcdf371615d4bbabdb9ddccf4d0d32517f7d manifest-bootleby-v1.3.3.toml
diff --git a/tools/permslip_staging b/tools/permslip_staging
index 9c413ddc6e..146a9d615e 100644
--- a/tools/permslip_staging
+++ b/tools/permslip_staging
@@ -1,5 +1,5 @@
 c33a381e716127e05da928c39b3a4d5f5278e43f526ff8c5c817708c378a5c87 manifest-gimlet-v1.0.32.toml
 2cda350adba506b3ab67813db932d07c7a7836b5731d5351e57d49302f41dbf4 manifest-oxide-rot-1-v1.0.30.toml
 70de21757b47e3e6c15d4c8701efe80e8cc90125afdd2883ff160045aed20956 manifest-psc-v1.0.31.toml
-499ee08eb77ed3600564239f3f3efdcf79f122ffc4b93b168790c24358ae1e3c manifest-sidecar-v1.0.31.toml
-6f8459afe22c27d5920356878e4d8d639464f39a15ce7b5b040c2d908d52a570 manifest-bootleby-v1.3.1.toml
+222ae9df38699037b75e98eb7a8b441f6cda958b8a79e57e72e410b054f1d8eb manifest-sidecar-v1.0.32.toml
+14c20540fe785dea65ef03446d5c4665a5f3d9106eb176691b35646faa54f61f manifest-bootleby-v1.3.3.toml
diff --git a/workspace-hack/Cargo.toml b/workspace-hack/Cargo.toml
index b0bf8858d5..31677ed8c1 100644
--- a/workspace-hack/Cargo.toml
+++ b/workspace-hack/Cargo.toml
@@ -62,15 +62,14 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-
 generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] }
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
-hashbrown = { version = "0.15.0" }
+hashbrown = { version = "0.15.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }
 hyper = { version = "1.5.0", features = ["full"] }
 indexmap = { version = "2.6.0", features = ["serde"] }
 inout = { version = "0.1.3", default-features = false, features = ["std"] }
-itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
-itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
+itertools = { version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] }
 libc = { version = "0.2.162", features = ["extra_traits"] }
@@ -101,7 +100,7 @@ regex-automata = { version = "0.4.8", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.5" }
 reqwest = { version = "0.12.9", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 rsa = { version = "0.9.6", features = ["serde", "sha2"] }
-rustls = { version = "0.23.14", features = ["ring"] }
+rustls = { version = "0.23.19", features = ["ring"] }
 rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] }
 schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }
@@ -182,15 +181,14 @@ gateway-messages = { git = "https://github.com/oxidecomputer/management-gateway-
 generic-array = { version = "0.14.7", default-features = false, features = ["more_lengths", "zeroize"] }
 getrandom = { version = "0.2.15", default-features = false, features = ["js", "rdrand", "std"] }
 group = { version = "0.13.0", default-features = false, features = ["alloc"] }
-hashbrown = { version = "0.15.0" }
+hashbrown = { version = "0.15.1" }
 hex = { version = "0.4.3", features = ["serde"] }
 hickory-proto = { version = "0.24.1", features = ["text-parsing"] }
 hmac = { version = "0.12.1", default-features = false, features = ["reset"] }
 hyper = { version = "1.5.0", features = ["full"] }
 indexmap = { version = "2.6.0", features = ["serde"] }
 inout = { version = "0.1.3", default-features = false, features = ["std"] }
-itertools-5ef9efb8ec2df382 = { package = "itertools", version = "0.12.1" }
-itertools-93f6ce9d446188ac = { package = "itertools", version = "0.10.5" }
+itertools = { version = "0.10.5" }
 lalrpop-util = { version = "0.19.12" }
 lazy_static = { version = "1.5.0", default-features = false, features = ["spin_no_std"] }
 libc = { version = "0.2.162", features = ["extra_traits"] }
@@ -221,7 +219,7 @@ regex-automata = { version = "0.4.8", default-features = false, features = ["dfa
 regex-syntax = { version = "0.8.5" }
 reqwest = { version = "0.12.9", features = ["blocking", "cookies", "json", "rustls-tls", "stream"] }
 rsa = { version = "0.9.6", features = ["serde", "sha2"] }
-rustls = { version = "0.23.14", features = ["ring"] }
+rustls = { version = "0.23.19", features = ["ring"] }
 rustls-webpki = { version = "0.102.8", default-features = false, features = ["aws_lc_rs", "ring", "std"] }
 schemars = { version = "0.8.21", features = ["bytes", "chrono", "uuid1"] }
 scopeguard = { version = "1.2.0" }