From f2ea80014b390575be74a8b4e2b1d0d714feee0b Mon Sep 17 00:00:00 2001 From: John Gallagher Date: Mon, 1 Apr 2024 10:19:46 -0400 Subject: [PATCH] [Reconfigurator] Introduce `PlanningInput` that includes external networking information from CRDB (#5344) The primary change in this PR is that the blueprint planner now wants a `PlanningInput` (which contains a `Policy`) instead of a `Policy`. The bulk of the diff is adding new `DataStore` methods (and tests for them) that fetch all currently-allocated external IPs and NICs for services. Some incidental changes that came along for the ride that I hope are not controversial, but could be backed out if they are: * `nexus_db_model::NetworkInterface::slot` is now a `SqlU8` instead of an `i16`. I didn't have to change the queries here, so I think they're still converting this to an `i16`, which is probably okay? I could make a pass on them if needed though. * I added an `omicron_uuid_kinds::ServiceKind` and started using it in this PR. I did not attempt to make a pass through all service UUIDs to start using this; I think this can be done incrementally? Other notes: * I'm not sure about the name `PlanningInput`. It feels vague; isn't every argument to the planner a kind of "planning input"? But I'm not sure what else to call "`Policy` plus extra CRDB state". * This does not change execution at all. It's possible when I get to that there will need to be some changes here, but I think this is probably close enough that it can be reviewed, and any changes will be small and can be rolled into the execution work. --- Cargo.lock | 4 + dev-tools/omdb/src/bin/omdb/db.rs | 4 +- dev-tools/reconfigurator-cli/Cargo.toml | 1 + dev-tools/reconfigurator-cli/src/main.rs | 67 +++++- nexus/db-model/src/external_ip.rs | 6 + nexus/db-model/src/network_interface.rs | 25 ++- .../db-queries/src/db/datastore/deployment.rs | 55 +++-- .../src/db/datastore/external_ip.rs | 175 ++++++++++++++- .../src/db/datastore/network_interface.rs | 205 ++++++++++++++++++ .../db-queries/src/db/queries/external_ip.rs | 46 ++-- .../src/db/queries/network_interface.rs | 7 +- nexus/reconfigurator/execution/src/dns.rs | 14 +- .../execution/src/resource_allocation.rs | 32 ++- nexus/reconfigurator/planning/Cargo.toml | 1 + .../planning/src/blueprint_builder.rs | 71 +++--- nexus/reconfigurator/planning/src/example.rs | 54 ++++- nexus/reconfigurator/planning/src/planner.rs | 87 ++++---- nexus/src/app/deployment.rs | 58 ++++- nexus/types/Cargo.toml | 2 + nexus/types/src/deployment.rs | 46 ++++ uuid-kinds/src/lib.rs | 1 + 21 files changed, 793 insertions(+), 168 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 4548d0a3d7..3959eef5e3 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -4827,6 +4827,7 @@ dependencies = [ "nexus-types", "omicron-common", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "rand 0.8.5", "sled-agent-client", @@ -4928,6 +4929,8 @@ dependencies = [ "futures", "gateway-client", "humantime", + "ipnetwork", + "newtype-uuid", "omicron-common", "omicron-passwords", "omicron-uuid-kinds", @@ -7482,6 +7485,7 @@ dependencies = [ "omicron-nexus", "omicron-rpaths", "omicron-test-utils", + "omicron-uuid-kinds", "omicron-workspace-hack", "pq-sys", "reedline", diff --git a/dev-tools/omdb/src/bin/omdb/db.rs b/dev-tools/omdb/src/bin/omdb/db.rs index 30473fccd4..5d9cb594ca 100644 --- a/dev-tools/omdb/src/bin/omdb/db.rs +++ b/dev-tools/omdb/src/bin/omdb/db.rs @@ -2120,7 +2120,7 @@ async fn cmd_db_network_list_vnics( struct NicRow { ip: IpNetwork, mac: MacAddr, - slot: i16, + slot: u8, primary: bool, kind: &'static str, subnet: String, @@ -2241,7 +2241,7 @@ async fn cmd_db_network_list_vnics( let row = NicRow { ip: nic.ip, mac: *nic.mac, - slot: nic.slot, + slot: *nic.slot, primary: nic.primary, kind, subnet, diff --git a/dev-tools/reconfigurator-cli/Cargo.toml b/dev-tools/reconfigurator-cli/Cargo.toml index cae07ec9b6..ad3cdf61f1 100644 --- a/dev-tools/reconfigurator-cli/Cargo.toml +++ b/dev-tools/reconfigurator-cli/Cargo.toml @@ -20,6 +20,7 @@ nexus-reconfigurator-planning.workspace = true nexus-reconfigurator-execution.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true # See omicron-rpaths for more about the "pq-sys" dependency. pq-sys = "*" reedline.workspace = true diff --git a/dev-tools/reconfigurator-cli/src/main.rs b/dev-tools/reconfigurator-cli/src/main.rs index cef5c3c63f..08755a4537 100644 --- a/dev-tools/reconfigurator-cli/src/main.rs +++ b/dev-tools/reconfigurator-cli/src/main.rs @@ -20,6 +20,9 @@ use nexus_reconfigurator_planning::planner::Planner; use nexus_reconfigurator_planning::system::{ SledBuilder, SledHwInventory, SystemDescription, }; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::deployment::{Blueprint, UnstableReconfiguratorState}; use nexus_types::internal_api::params::DnsConfigParams; use nexus_types::inventory::Collection; @@ -27,9 +30,12 @@ use nexus_types::inventory::OmicronZonesConfig; use nexus_types::inventory::SledRole; use omicron_common::api::external::Generation; use omicron_common::api::external::Name; +use omicron_uuid_kinds::{GenericUuid, OmicronZoneKind, TypedUuid}; use reedline::{Reedline, Signal}; +use std::cell::RefCell; use std::collections::BTreeMap; use std::io::BufRead; +use std::net::IpAddr; use swrite::{swriteln, SWrite}; use tabled::Tabled; use uuid::Uuid; @@ -50,6 +56,14 @@ struct ReconfiguratorSim { /// blueprints created by the user blueprints: IndexMap, + /// external IPs allocated to services + /// + /// In the real system, external IPs have IDs, but those IDs only live in + /// CRDB - they're not part of the zone config sent from Reconfigurator to + /// sled-agent. This mimics the minimal bit of the CRDB `external_ip` table + /// we need. + external_ips: RefCell>, + /// internal DNS configurations internal_dns: BTreeMap, /// external DNS configurations @@ -92,6 +106,49 @@ impl ReconfiguratorSim { let _ = entry.or_insert(blueprint); Ok(()) } + + fn planning_input( + &self, + parent_blueprint: &Blueprint, + ) -> anyhow::Result { + let policy = self.system.to_policy().context("generating policy")?; + let service_external_ips = parent_blueprint + .all_omicron_zones() + .filter_map(|(_, zone)| { + let Ok(Some(ip)) = zone.zone_type.external_ip() else { + return None; + }; + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + let external_ip = ExternalIp { + id: *self + .external_ips + .borrow_mut() + .entry(ip) + .or_insert_with(Uuid::new_v4), + ip: ip.into(), + }; + Some((service_id, external_ip)) + }) + .collect(); + let service_nics = parent_blueprint + .all_omicron_zones() + .filter_map(|(_, zone)| { + let nic = zone.zone_type.service_vnic()?; + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + let nic = ServiceNetworkInterface { + id: nic.id, + mac: nic.mac, + ip: nic.ip.into(), + slot: nic.slot, + primary: nic.primary, + }; + Some((service_id, nic)) + }) + .collect(); + Ok(PlanningInput { policy, service_external_ips, service_nics }) + } } /// interactive REPL for exploring the planner @@ -115,6 +172,7 @@ fn main() -> anyhow::Result<()> { system: SystemDescription::new(), collections: IndexMap::new(), blueprints: IndexMap::new(), + external_ips: RefCell::new(IndexMap::new()), internal_dns: BTreeMap::new(), external_dns: BTreeMap::new(), log, @@ -655,9 +713,8 @@ fn cmd_blueprint_plan( .collections .get(&collection_id) .ok_or_else(|| anyhow!("no such collection: {}", collection_id))?; - let policy = sim.system.to_policy().context("generating policy")?; let creator = "reconfigurator-sim"; - + let planning_input = sim.planning_input(parent_blueprint)?; let planner = Planner::new_based_on( sim.log.clone(), parent_blueprint, @@ -688,7 +745,7 @@ fn cmd_blueprint_plan( // matter, either. We'll just pick the parent blueprint's. parent_blueprint.internal_dns_version, parent_blueprint.external_dns_version, - &policy, + &planning_input, creator, collection, ) @@ -709,13 +766,13 @@ fn cmd_blueprint_edit( let blueprint_id = args.blueprint_id; let blueprint = sim.blueprint_lookup(blueprint_id)?; let creator = args.creator.as_deref().unwrap_or("reconfigurator-cli"); - let policy = sim.system.to_policy().context("assembling policy")?; + let planning_input = sim.planning_input(blueprint)?; let mut builder = BlueprintBuilder::new_based_on( &sim.log, &blueprint, blueprint.internal_dns_version, blueprint.external_dns_version, - &policy, + &planning_input, creator, ) .context("creating blueprint builder")?; diff --git a/nexus/db-model/src/external_ip.rs b/nexus/db-model/src/external_ip.rs index 337e7ef2a7..f290fdcd0f 100644 --- a/nexus/db-model/src/external_ip.rs +++ b/nexus/db-model/src/external_ip.rs @@ -130,6 +130,12 @@ pub struct ExternalIp { pub is_probe: bool, } +impl From for nexus_types::deployment::ExternalIp { + fn from(ext_ip: ExternalIp) -> Self { + Self { id: ext_ip.id, ip: ext_ip.ip } + } +} + /// A view type constructed from `ExternalIp` used to represent Floating IP /// objects in user-facing APIs. /// diff --git a/nexus/db-model/src/network_interface.rs b/nexus/db-model/src/network_interface.rs index fdcfcbf588..a632772043 100644 --- a/nexus/db-model/src/network_interface.rs +++ b/nexus/db-model/src/network_interface.rs @@ -8,6 +8,7 @@ use crate::schema::instance_network_interface; use crate::schema::network_interface; use crate::schema::service_network_interface; use crate::Name; +use crate::SqlU8; use chrono::DateTime; use chrono::Utc; use db_macros::Resource; @@ -59,7 +60,7 @@ pub struct NetworkInterface { // If neither is specified, auto-assign one of each? pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } @@ -91,10 +92,10 @@ impl NetworkInterface { name: self.name().clone(), ip: self.ip.ip(), mac: self.mac.into(), - subnet: subnet, + subnet, vni: external::Vni::try_from(0).unwrap(), primary: self.primary, - slot: self.slot.try_into().unwrap(), + slot: *self.slot, } } } @@ -117,7 +118,7 @@ pub struct InstanceNetworkInterface { pub mac: MacAddr, pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } @@ -140,11 +141,25 @@ pub struct ServiceNetworkInterface { pub mac: MacAddr, pub ip: ipnetwork::IpNetwork, - pub slot: i16, + pub slot: SqlU8, #[diesel(column_name = is_primary)] pub primary: bool, } +impl From + for nexus_types::deployment::ServiceNetworkInterface +{ + fn from(nic: ServiceNetworkInterface) -> Self { + Self { + id: nic.id(), + mac: *nic.mac, + ip: nic.ip, + slot: *nic.slot, + primary: nic.primary, + } + } +} + impl NetworkInterface { /// Treat this `NetworkInterface` as an `InstanceNetworkInterface`. /// diff --git a/nexus/db-queries/src/db/datastore/deployment.rs b/nexus/db-queries/src/db/datastore/deployment.rs index 8f6b9abf58..b04dc9a03d 100644 --- a/nexus/db-queries/src/db/datastore/deployment.rs +++ b/nexus/db-queries/src/db/datastore/deployment.rs @@ -1182,6 +1182,7 @@ mod tests { use nexus_reconfigurator_planning::blueprint_builder::BlueprintBuilder; use nexus_reconfigurator_planning::blueprint_builder::Ensure; use nexus_test_utils::db::test_setup_database; + use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::external_api::views::SledPolicy; @@ -1196,10 +1197,14 @@ mod tests { use std::mem; use std::net::Ipv6Addr; - static EMPTY_POLICY: Policy = Policy { - sleds: BTreeMap::new(), - service_ip_pool_ranges: Vec::new(), - target_nexus_zone_count: 0, + static EMPTY_PLANNING_INPUT: PlanningInput = PlanningInput { + policy: Policy { + sleds: BTreeMap::new(), + service_ip_pool_ranges: Vec::new(), + target_nexus_zone_count: 0, + }, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), }; // This is a not-super-future-maintainer-friendly helper to check that all @@ -1283,7 +1288,7 @@ mod tests { } } - fn representative() -> (Collection, Policy, Blueprint) { + fn representative() -> (Collection, PlanningInput, Blueprint) { // We'll start with a representative collection... let mut collection = nexus_inventory::examples::representative().builder.build(); @@ -1307,16 +1312,21 @@ mod tests { } let policy = policy_from_collection(&collection); + let planning_input = PlanningInput { + policy, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), Generation::new(), - &policy, + &planning_input.policy, "test", ) .unwrap(); - (collection, policy, blueprint) + (collection, planning_input, blueprint) } async fn blueprint_list_all_ids( @@ -1346,7 +1356,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test", ) .unwrap(); @@ -1402,7 +1412,7 @@ mod tests { let (opctx, datastore) = datastore_test(&logctx, &db).await; // Create a cohesive representative collection/policy/blueprint - let (collection, mut policy, blueprint1) = representative(); + let (collection, mut planning_input, blueprint1) = representative(); let authz_blueprint1 = authz_blueprint_from_id(blueprint1.id); // Write it to the database and read it back. @@ -1421,7 +1431,10 @@ mod tests { ); // Check the number of blueprint elements against our collection. - assert_eq!(blueprint1.blueprint_zones.len(), policy.sleds.len()); + assert_eq!( + blueprint1.blueprint_zones.len(), + planning_input.policy.sleds.len() + ); assert_eq!( blueprint1.blueprint_zones.len(), collection.omicron_zones.len() @@ -1463,8 +1476,12 @@ mod tests { // Add a new sled to `policy`. let new_sled_id = Uuid::new_v4(); - policy.sleds.insert(new_sled_id, fake_sled_resources(None)); - let new_sled_zpools = &policy.sleds.get(&new_sled_id).unwrap().zpools; + planning_input + .policy + .sleds + .insert(new_sled_id, fake_sled_resources(None)); + let new_sled_zpools = + &planning_input.policy.sleds.get(&new_sled_id).unwrap().zpools; // Create a builder for a child blueprint. While we're at it, use a // different DNS version to test that that works. @@ -1475,7 +1492,7 @@ mod tests { &blueprint1, new_internal_dns_version, new_external_dns_version, - &policy, + &planning_input, "test", ) .expect("failed to create builder"); @@ -1621,7 +1638,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test1", ) .unwrap(); @@ -1630,7 +1647,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test2", ) .expect("failed to create builder") @@ -1640,7 +1657,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test3", ) .expect("failed to create builder") @@ -1738,7 +1755,7 @@ mod tests { &blueprint3, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test3", ) .expect("failed to create builder") @@ -1778,7 +1795,7 @@ mod tests { &collection, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT.policy, "test1", ) .unwrap(); @@ -1787,7 +1804,7 @@ mod tests { &blueprint1, Generation::new(), Generation::new(), - &EMPTY_POLICY, + &EMPTY_PLANNING_INPUT, "test2", ) .expect("failed to create builder") diff --git a/nexus/db-queries/src/db/datastore/external_ip.rs b/nexus/db-queries/src/db/datastore/external_ip.rs index 017d2f22d2..cc5ddc50d5 100644 --- a/nexus/db-queries/src/db/datastore/external_ip.rs +++ b/nexus/db-queries/src/db/datastore/external_ip.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`ExternalIp`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::authz::ApiResource; use crate::context::OpContext; @@ -24,6 +25,7 @@ use crate::db::model::IncompleteExternalIp; use crate::db::model::IpKind; use crate::db::model::Name; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use crate::db::queries::external_ip::NextExternalIp; use crate::db::queries::external_ip::MAX_EXTERNAL_IPS_PER_INSTANCE; @@ -41,6 +43,7 @@ use nexus_db_model::IpAttachState; use nexus_types::identity::Resource; use omicron_common::api::external::http_pagination::PaginatedBy; use omicron_common::api::external::CreateResult; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::IdentityMetadataCreateParams; @@ -206,7 +209,7 @@ impl DataStore { } /// Fetch all external IP addresses of any kind for the provided service. - pub async fn service_lookup_external_ips( + pub async fn external_ip_list_service( &self, opctx: &OpContext, service_id: Uuid, @@ -223,7 +226,7 @@ impl DataStore { } /// Allocates an IP address for internal service usage. - pub async fn allocate_service_ip( + pub async fn external_ip_allocate_service( &self, opctx: &OpContext, ip_id: Uuid, @@ -244,7 +247,7 @@ impl DataStore { } /// Allocates an SNAT IP address for internal service usage. - pub async fn allocate_service_snat_ip( + pub async fn external_ip_allocate_service_snat( &self, opctx: &OpContext, ip_id: Uuid, @@ -384,7 +387,7 @@ impl DataStore { /// /// Unlike the other IP allocation requests, this does not search for an /// available IP address, it asks for one explicitly. - pub async fn allocate_explicit_service_ip( + pub async fn external_ip_allocate_service_explicit( &self, opctx: &OpContext, ip_id: Uuid, @@ -410,7 +413,7 @@ impl DataStore { /// /// Unlike the other IP allocation requests, this does not search for an /// available IP address, it asks for one explicitly. - pub async fn allocate_explicit_service_snat_ip( + pub async fn external_ip_allocate_service_explicit_snat( &self, opctx: &OpContext, ip_id: Uuid, @@ -430,6 +433,50 @@ impl DataStore { self.allocate_external_ip(opctx, data).await } + /// List one page of all external IPs allocated to internal services + pub async fn external_ip_list_service_all( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::external_ip::dsl; + + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::external_ip, dsl::id, pagparams) + .filter(dsl::is_service) + .filter(dsl::time_deleted.is_null()) + .select(ExternalIp::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all external IPs allocated to internal services, making as many + /// queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn external_ip_list_service_all_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_ips = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .external_ip_list_service_all(opctx, &p.current_pagparams()) + .await?; + paginator = p.found_batch(&batch, &|ip: &ExternalIp| ip.id); + all_ips.extend(batch); + } + Ok(all_ips) + } + /// Attempt to move a target external IP from detached to attaching, /// checking that its parent instance does not have too many addresses /// and is in a valid state. @@ -1163,3 +1210,121 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::test_utils::datastore_test; + use nexus_test_utils::db::test_setup_database; + use nexus_types::external_api::shared::IpRange; + use omicron_common::address::NUM_SOURCE_NAT_PORTS; + use omicron_test_utils::dev; + use std::collections::BTreeSet; + use std::net::Ipv4Addr; + + async fn read_all_service_ips( + datastore: &DataStore, + opctx: &OpContext, + ) -> Vec { + let all_batched = datastore + .external_ip_list_service_all_batched(opctx) + .await + .expect("failed to fetch all service IPs batched"); + let all_paginated = datastore + .external_ip_list_service_all(opctx, &DataPageParams::max_page()) + .await + .expect("failed to fetch all service IPs paginated"); + assert_eq!(all_batched, all_paginated); + all_batched + } + + #[tokio::test] + async fn test_service_ip_list() { + usdt::register_probes().unwrap(); + let logctx = dev::test_setup_log("test_service_ip_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // No IPs, to start + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, vec![]); + + // Set up service IP pool range + let ip_range = IpRange::try_from(( + Ipv4Addr::new(10, 0, 0, 1), + Ipv4Addr::new(10, 0, 0, 10), + )) + .unwrap(); + let (service_ip_pool, _) = datastore + .ip_pools_service_lookup(&opctx) + .await + .expect("lookup service ip pool"); + datastore + .ip_pool_add_range(&opctx, &service_ip_pool, &ip_range) + .await + .expect("add range to service ip pool"); + + // Allocate a bunch of fake service IPs. + let mut external_ips = Vec::new(); + let mut allocate_snat = false; // flip-flop between regular and snat + for (i, ip) in ip_range.iter().enumerate() { + let name = format!("service-ip-{i}"); + let external_ip = if allocate_snat { + datastore + .external_ip_allocate_service_explicit_snat( + &opctx, + Uuid::new_v4(), + Uuid::new_v4(), + ip, + (0, NUM_SOURCE_NAT_PORTS - 1), + ) + .await + .expect("failed to allocate service IP") + } else { + datastore + .external_ip_allocate_service_explicit( + &opctx, + Uuid::new_v4(), + &Name(name.parse().unwrap()), + &name, + Uuid::new_v4(), + ip, + ) + .await + .expect("failed to allocate service IP") + }; + external_ips.push(external_ip); + allocate_snat = !allocate_snat; + } + external_ips.sort_by_key(|ip| ip.id); + + // Ensure we see them all. + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, external_ips); + + // Deallocate a few, and ensure we don't see them anymore. + let mut removed_ip_ids = BTreeSet::new(); + for (i, external_ip) in external_ips.iter().enumerate() { + if i % 3 == 0 { + let id = external_ip.id; + datastore + .deallocate_external_ip(&opctx, id) + .await + .expect("failed to deallocate IP"); + removed_ip_ids.insert(id); + } + } + + // Check that we removed at least one, then prune them from our list of + // expected IPs. + assert!(!removed_ip_ids.is_empty()); + external_ips.retain(|ip| !removed_ip_ids.contains(&ip.id)); + + // Ensure we see them all remaining IPs. + let ips = read_all_service_ips(&datastore, &opctx).await; + assert_eq!(ips, external_ips); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/datastore/network_interface.rs b/nexus/db-queries/src/db/datastore/network_interface.rs index 1bccca4e97..795c973407 100644 --- a/nexus/db-queries/src/db/datastore/network_interface.rs +++ b/nexus/db-queries/src/db/datastore/network_interface.rs @@ -5,6 +5,7 @@ //! [`DataStore`] methods on [`NetworkInterface`]s. use super::DataStore; +use super::SQL_BATCH_SIZE; use crate::authz; use crate::context::OpContext; use crate::db; @@ -22,6 +23,7 @@ use crate::db::model::NetworkInterfaceKind; use crate::db::model::NetworkInterfaceUpdate; use crate::db::model::VpcSubnet; use crate::db::pagination::paginated; +use crate::db::pagination::Paginator; use crate::db::pool::DbConnection; use crate::db::queries::network_interface; use crate::transaction_retry::OptionalError; @@ -30,8 +32,10 @@ use chrono::Utc; use diesel::prelude::*; use diesel::result::Error as DieselError; use nexus_db_model::ServiceNetworkInterface; +use nexus_types::identity::Resource; use omicron_common::api::external; use omicron_common::api::external::http_pagination::PaginatedBy; +use omicron_common::api::external::DataPageParams; use omicron_common::api::external::DeleteResult; use omicron_common::api::external::Error; use omicron_common::api::external::ListResultVec; @@ -169,6 +173,58 @@ impl DataStore { .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) } + /// List one page of all network interfaces associated with internal services + pub async fn service_network_interfaces_all_list( + &self, + opctx: &OpContext, + pagparams: &DataPageParams<'_, Uuid>, + ) -> ListResultVec { + use db::schema::service_network_interface::dsl; + + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require ListChildren on the + // instance to list). As a logical proxy, we check for listing children + // of the service IP pool. + let (authz_pool, _pool) = self.ip_pools_service_lookup(opctx).await?; + opctx.authorize(authz::Action::ListChildren, &authz_pool).await?; + + paginated(dsl::service_network_interface, dsl::id, pagparams) + .filter(dsl::time_deleted.is_null()) + .select(ServiceNetworkInterface::as_select()) + .get_results_async(&*self.pool_connection_authorized(opctx).await?) + .await + .map_err(|e| public_error_from_diesel(e, ErrorHandler::Server)) + } + + /// List all network interfaces associated with internal services, making as + /// many queries as needed to get them all + /// + /// This should generally not be used in API handlers or other + /// latency-sensitive contexts, but it can make sense in saga actions or + /// background tasks. + pub async fn service_network_interfaces_all_list_batched( + &self, + opctx: &OpContext, + ) -> ListResultVec { + opctx.check_complex_operations_allowed()?; + + let mut all_ips = Vec::new(); + let mut paginator = Paginator::new(SQL_BATCH_SIZE); + while let Some(p) = paginator.next() { + let batch = self + .service_network_interfaces_all_list( + opctx, + &p.current_pagparams(), + ) + .await?; + paginator = p + .found_batch(&batch, &|nic: &ServiceNetworkInterface| nic.id()); + all_ips.extend(batch); + } + Ok(all_ips) + } + /// Create a network interface attached to the provided service zone. pub async fn service_create_network_interface( &self, @@ -345,6 +401,47 @@ impl DataStore { Ok(()) } + /// Delete a `ServiceNetworkInterface` attached to a provided service. + pub async fn service_delete_network_interface( + &self, + opctx: &OpContext, + service_id: Uuid, + network_interface_id: Uuid, + ) -> Result<(), network_interface::DeleteError> { + // See the comment in `service_create_network_interface`. There's no + // obvious parent for a service network interface (as opposed to + // instance network interfaces, which require permissions on the + // instance). As a logical proxy, we check for listing children of the + // service IP pool. + let (authz_service_ip_pool, _) = self + .ip_pools_service_lookup(opctx) + .await + .map_err(network_interface::DeleteError::External)?; + opctx + .authorize(authz::Action::Delete, &authz_service_ip_pool) + .await + .map_err(network_interface::DeleteError::External)?; + + let query = network_interface::DeleteQuery::new( + NetworkInterfaceKind::Service, + service_id, + network_interface_id, + ); + query + .clone() + .execute_async( + &*self + .pool_connection_authorized(opctx) + .await + .map_err(network_interface::DeleteError::External)?, + ) + .await + .map_err(|e| { + network_interface::DeleteError::from_diesel(e, &query) + })?; + Ok(()) + } + /// Return information about network interfaces required for the sled /// agent to instantiate or modify them via OPTE. This function takes /// a partially constructed query over the network interface table so @@ -688,3 +785,111 @@ impl DataStore { }) } } + +#[cfg(test)] +mod tests { + use super::*; + use crate::db::datastore::test_utils::datastore_test; + use crate::db::fixed_data::vpc_subnet::NEXUS_VPC_SUBNET; + use nexus_config::NUM_INITIAL_RESERVED_IP_ADDRESSES; + use nexus_test_utils::db::test_setup_database; + use omicron_common::address::NEXUS_OPTE_IPV4_SUBNET; + use omicron_test_utils::dev; + use std::collections::BTreeSet; + + async fn read_all_service_nics( + datastore: &DataStore, + opctx: &OpContext, + ) -> Vec { + let all_batched = datastore + .service_network_interfaces_all_list_batched(opctx) + .await + .expect("failed to fetch all service NICs batched"); + let all_paginated = datastore + .service_network_interfaces_all_list( + opctx, + &DataPageParams::max_page(), + ) + .await + .expect("failed to fetch all service NICs paginated"); + assert_eq!(all_batched, all_paginated); + all_batched + } + + #[tokio::test] + async fn test_service_network_interfaces_list() { + usdt::register_probes().unwrap(); + let logctx = + dev::test_setup_log("test_service_network_interfaces_list"); + let mut db = test_setup_database(&logctx.log).await; + let (opctx, datastore) = datastore_test(&logctx, &db).await; + + // No IPs, to start + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, vec![]); + + // Insert 10 Nexus NICs + let ip_range = NEXUS_OPTE_IPV4_SUBNET + .0 + .iter() + .skip(NUM_INITIAL_RESERVED_IP_ADDRESSES) + .take(10); + let mut macs = external::MacAddr::iter_system(); + let mut service_nics = Vec::new(); + for (i, ip) in ip_range.enumerate() { + let name = format!("service-nic-{i}"); + let interface = IncompleteNetworkInterface::new_service( + Uuid::new_v4(), + Uuid::new_v4(), + NEXUS_VPC_SUBNET.clone(), + external::IdentityMetadataCreateParams { + name: name.parse().unwrap(), + description: name, + }, + ip.into(), + macs.next().unwrap(), + 0, + ) + .unwrap(); + let nic = datastore + .service_create_network_interface(&opctx, interface) + .await + .expect("failed to insert service nic"); + service_nics.push(nic); + } + service_nics.sort_by_key(|nic| nic.id()); + + // Ensure we see them all. + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, service_nics); + + // Delete a few, and ensure we don't see them anymore. + let mut removed_nic_ids = BTreeSet::new(); + for (i, nic) in service_nics.iter().enumerate() { + if i % 3 == 0 { + let id = nic.id(); + datastore + .service_delete_network_interface( + &opctx, + nic.service_id, + id, + ) + .await + .expect("failed to delete NIC"); + removed_nic_ids.insert(id); + } + } + + // Check that we removed at least one, then prune them from our list of + // expected IPs. + assert!(!removed_nic_ids.is_empty()); + service_nics.retain(|nic| !removed_nic_ids.contains(&nic.id())); + + // Ensure we see them all remaining IPs. + let nics = read_all_service_nics(&datastore, &opctx).await; + assert_eq!(nics, service_nics); + + db.cleanup().await.unwrap(); + logctx.cleanup_successful(); + } +} diff --git a/nexus/db-queries/src/db/queries/external_ip.rs b/nexus/db-queries/src/db/queries/external_ip.rs index 0502450121..3969c808f9 100644 --- a/nexus/db-queries/src/db/queries/external_ip.rs +++ b/nexus/db-queries/src/db/queries/external_ip.rs @@ -1345,7 +1345,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service1_id) + .external_ip_list_service(&context.opctx, service1_id) .await .expect("Failed to look up service external IPs"), Vec::new(), @@ -1354,7 +1354,7 @@ mod tests { let id1 = Uuid::new_v4(); let ip1 = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id1, &Name("service1-ip".parse().unwrap()), @@ -1372,7 +1372,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service1_id) + .external_ip_list_service(&context.opctx, service1_id) .await .expect("Failed to look up service external IPs"), vec![ip1], @@ -1383,7 +1383,7 @@ mod tests { let id2 = Uuid::new_v4(); let ip2 = context .db_datastore - .allocate_service_snat_ip(&context.opctx, id2, service2_id) + .external_ip_allocate_service_snat(&context.opctx, id2, service2_id) .await .expect("Failed to allocate service IP address"); assert!(ip2.is_service); @@ -1395,7 +1395,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service2_id) + .external_ip_list_service(&context.opctx, service2_id) .await .expect("Failed to look up service external IPs"), vec![ip2], @@ -1406,7 +1406,7 @@ mod tests { let id3 = Uuid::new_v4(); let ip3 = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id3, &Name("service3-ip".parse().unwrap()), @@ -1424,7 +1424,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service3_id) + .external_ip_list_service(&context.opctx, service3_id) .await .expect("Failed to look up service external IPs"), vec![ip3], @@ -1435,7 +1435,7 @@ mod tests { let id3 = Uuid::new_v4(); let err = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id3, &Name("service3-ip".parse().unwrap()), @@ -1457,7 +1457,7 @@ mod tests { let id4 = Uuid::new_v4(); let ip4 = context .db_datastore - .allocate_service_snat_ip(&context.opctx, id4, service4_id) + .external_ip_allocate_service_snat(&context.opctx, id4, service4_id) .await .expect("Failed to allocate service IP address"); assert!(ip4.is_service); @@ -1469,7 +1469,7 @@ mod tests { assert_eq!( context .db_datastore - .service_lookup_external_ips(&context.opctx, service4_id) + .external_ip_list_service(&context.opctx, service4_id) .await .expect("Failed to look up service external IPs"), vec![ip4], @@ -1498,7 +1498,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1517,7 +1517,7 @@ mod tests { // Try allocating the same service IP again. let ip_again = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1535,7 +1535,7 @@ mod tests { // different UUID. let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, Uuid::new_v4(), &Name("service-ip".parse().unwrap()), @@ -1554,7 +1554,7 @@ mod tests { // different input address. let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1573,7 +1573,7 @@ mod tests { // different port range. let err = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, id, service_id, @@ -1592,7 +1592,7 @@ mod tests { let snat_id = Uuid::new_v4(); let snat_ip = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1611,7 +1611,7 @@ mod tests { // Try allocating the same service IP again. let snat_ip_again = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1630,7 +1630,7 @@ mod tests { // different port range. let err = context .db_datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( &context.opctx, snat_id, snat_service_id, @@ -1665,7 +1665,7 @@ mod tests { let id = Uuid::new_v4(); let err = context .db_datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1703,7 +1703,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1720,7 +1720,7 @@ mod tests { let ip_again = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1760,7 +1760,7 @@ mod tests { let id = Uuid::new_v4(); let ip = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), @@ -1777,7 +1777,7 @@ mod tests { let ip_again = context .db_datastore - .allocate_service_ip( + .external_ip_allocate_service( &context.opctx, id, &Name("service-ip".parse().unwrap()), diff --git a/nexus/db-queries/src/db/queries/network_interface.rs b/nexus/db-queries/src/db/queries/network_interface.rs index c0fc18aca1..afd6af1140 100644 --- a/nexus/db-queries/src/db/queries/network_interface.rs +++ b/nexus/db-queries/src/db/queries/network_interface.rs @@ -2300,7 +2300,7 @@ mod tests { .service_create_network_interface_raw(&context.opctx, interface) .await .expect("Failed to insert interface"); - assert_eq!(inserted_interface.slot, i16::from(slot)); + assert_eq!(*inserted_interface.slot, slot); } context.success().await; @@ -2413,7 +2413,7 @@ mod tests { .service_create_network_interface_raw(&context.opctx, interface) .await .expect("Failed to insert interface"); - assert_eq!(inserted_interface.slot, 0); + assert_eq!(*inserted_interface.slot, 0); // Inserting an interface with the same slot on the same service should let new_interface = IncompleteNetworkInterface::new_service( @@ -2776,8 +2776,7 @@ mod tests { ) .await .expect("Should be able to insert up to 8 interfaces"); - let actual_slot = usize::try_from(inserted_interface.slot) - .expect("Bad slot index"); + let actual_slot = usize::from(*inserted_interface.slot); assert_eq!( slot, actual_slot, "Failed to allocate next available interface slot" diff --git a/nexus/reconfigurator/execution/src/dns.rs b/nexus/reconfigurator/execution/src/dns.rs index fc95414103..420a1ec84f 100644 --- a/nexus/reconfigurator/execution/src/dns.rs +++ b/nexus/reconfigurator/execution/src/dns.rs @@ -509,6 +509,7 @@ mod test { use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneType; + use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; @@ -864,13 +865,13 @@ mod test { async fn test_blueprint_external_dns_basic() { static TEST_NAME: &str = "test_blueprint_external_dns_basic"; let logctx = test_setup_log(TEST_NAME); - let (collection, policy) = example(&logctx.log, TEST_NAME, 5); + let (collection, input) = example(&logctx.log, TEST_NAME, 5); let initial_external_dns_generation = Generation::new(); let blueprint = BlueprintBuilder::build_initial_from_collection( &collection, Generation::new(), initial_external_dns_generation, - &policy, + &input.policy, "test suite", ) .expect("failed to generate initial blueprint"); @@ -1215,6 +1216,13 @@ mod test { policy .service_ip_pool_ranges .push(IpRange::from(IpAddr::V4(Ipv4Addr::LOCALHOST))); + let planning_input = PlanningInput { + policy, + // These are not used because we're not actually going through the + // planner. + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; let mut builder = BlueprintBuilder::new_based_on( &log, &blueprint, @@ -1224,7 +1232,7 @@ mod test { Generation::from( u32::try_from(dns_latest_external.generation).unwrap(), ), - &policy, + &planning_input, "test suite", ) .unwrap(); diff --git a/nexus/reconfigurator/execution/src/resource_allocation.rs b/nexus/reconfigurator/execution/src/resource_allocation.rs index 92262ce133..2803482058 100644 --- a/nexus/reconfigurator/execution/src/resource_allocation.rs +++ b/nexus/reconfigurator/execution/src/resource_allocation.rs @@ -100,7 +100,7 @@ impl<'a> ResourceAllocator<'a> { let allocated_ips = self .datastore - .service_lookup_external_ips(self.opctx, zone_id) + .external_ip_list_service(self.opctx, zone_id) .await .with_context(|| { format!( @@ -186,7 +186,7 @@ impl<'a> ResourceAllocator<'a> { for allocated_nic in &allocated_nics { if allocated_nic.ip.ip() == nic.ip && *allocated_nic.mac == nic.mac - && allocated_nic.slot == i16::from(nic.slot) + && *allocated_nic.slot == nic.slot && allocated_nic.primary == nic.primary { info!( @@ -258,7 +258,7 @@ impl<'a> ResourceAllocator<'a> { let ip_id = Uuid::new_v4(); let description = zone_type; self.datastore - .allocate_explicit_service_ip( + .external_ip_allocate_service_explicit( self.opctx, ip_id, ip_name, @@ -313,7 +313,7 @@ impl<'a> ResourceAllocator<'a> { let ip_id = Uuid::new_v4(); self.datastore - .allocate_explicit_service_snat_ip( + .external_ip_allocate_service_explicit_snat( self.opctx, ip_id, service_id, @@ -403,14 +403,12 @@ impl<'a> ResourceAllocator<'a> { // We do not check `nic.vni`, because it's not stored in the // database. (All services are given the constant vni // `Vni::SERVICES_VNI`.) - if created_nic.primary != nic.primary - || created_nic.slot != i16::from(nic.slot) - { + if created_nic.primary != nic.primary || *created_nic.slot != nic.slot { warn!( self.opctx.log, "unexpected property on allocated NIC"; "db_primary" => created_nic.primary, "expected_primary" => nic.primary, - "db_slot" => created_nic.slot, + "db_slot" => *created_nic.slot, "expected_slot" => nic.slot, ); @@ -671,7 +669,7 @@ mod tests { // Check that the external IP records were created. let db_nexus_ips = datastore - .service_lookup_external_ips(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id) .await .expect("failed to get external IPs"); assert_eq!(db_nexus_ips.len(), 1); @@ -682,7 +680,7 @@ mod tests { assert_eq!(db_nexus_ips[0].last_port, SqlU16(65535)); let db_dns_ips = datastore - .service_lookup_external_ips(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id) .await .expect("failed to get external IPs"); assert_eq!(db_dns_ips.len(), 1); @@ -693,7 +691,7 @@ mod tests { assert_eq!(db_dns_ips[0].last_port, SqlU16(65535)); let db_ntp_ips = datastore - .service_lookup_external_ips(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id) .await .expect("failed to get external IPs"); assert_eq!(db_ntp_ips.len(), 1); @@ -715,7 +713,7 @@ mod tests { assert_eq!(db_nexus_nics[0].subnet_id, NEXUS_VPC_SUBNET.id()); assert_eq!(*db_nexus_nics[0].mac, nexus_nic.mac); assert_eq!(db_nexus_nics[0].ip, nexus_nic.ip.into()); - assert_eq!(db_nexus_nics[0].slot, i16::from(nexus_nic.slot)); + assert_eq!(*db_nexus_nics[0].slot, nexus_nic.slot); assert_eq!(db_nexus_nics[0].primary, nexus_nic.primary); let db_dns_nics = datastore @@ -729,7 +727,7 @@ mod tests { assert_eq!(db_dns_nics[0].subnet_id, DNS_VPC_SUBNET.id()); assert_eq!(*db_dns_nics[0].mac, dns_nic.mac); assert_eq!(db_dns_nics[0].ip, dns_nic.ip.into()); - assert_eq!(db_dns_nics[0].slot, i16::from(dns_nic.slot)); + assert_eq!(*db_dns_nics[0].slot, dns_nic.slot); assert_eq!(db_dns_nics[0].primary, dns_nic.primary); let db_ntp_nics = datastore @@ -743,7 +741,7 @@ mod tests { assert_eq!(db_ntp_nics[0].subnet_id, NTP_VPC_SUBNET.id()); assert_eq!(*db_ntp_nics[0].mac, ntp_nic.mac); assert_eq!(db_ntp_nics[0].ip, ntp_nic.ip.into()); - assert_eq!(db_ntp_nics[0].slot, i16::from(ntp_nic.slot)); + assert_eq!(*db_ntp_nics[0].slot, ntp_nic.slot); assert_eq!(db_ntp_nics[0].primary, ntp_nic.primary); // We should be able to run the function again with the same inputs, and @@ -755,21 +753,21 @@ mod tests { assert_eq!( db_nexus_ips, datastore - .service_lookup_external_ips(&opctx, nexus_id) + .external_ip_list_service(&opctx, nexus_id) .await .expect("failed to get external IPs") ); assert_eq!( db_dns_ips, datastore - .service_lookup_external_ips(&opctx, dns_id) + .external_ip_list_service(&opctx, dns_id) .await .expect("failed to get external IPs") ); assert_eq!( db_ntp_ips, datastore - .service_lookup_external_ips(&opctx, ntp_id) + .external_ip_list_service(&opctx, ntp_id) .await .expect("failed to get external IPs") ); diff --git a/nexus/reconfigurator/planning/Cargo.toml b/nexus/reconfigurator/planning/Cargo.toml index cb55d9aa7c..f990a92157 100644 --- a/nexus/reconfigurator/planning/Cargo.toml +++ b/nexus/reconfigurator/planning/Cargo.toml @@ -15,6 +15,7 @@ nexus-config.workspace = true nexus-inventory.workspace = true nexus-types.workspace = true omicron-common.workspace = true +omicron-uuid-kinds.workspace = true rand.workspace = true sled-agent-client.workspace = true slog.workspace = true diff --git a/nexus/reconfigurator/planning/src/blueprint_builder.rs b/nexus/reconfigurator/planning/src/blueprint_builder.rs index dc0f1e501c..827693beb1 100644 --- a/nexus/reconfigurator/planning/src/blueprint_builder.rs +++ b/nexus/reconfigurator/planning/src/blueprint_builder.rs @@ -19,6 +19,7 @@ use nexus_types::deployment::BlueprintZonesConfig; use nexus_types::deployment::OmicronZoneConfig; use nexus_types::deployment::OmicronZoneDataset; use nexus_types::deployment::OmicronZoneType; +use nexus_types::deployment::PlanningInput; use nexus_types::deployment::Policy; use nexus_types::deployment::SledResources; use nexus_types::deployment::ZpoolName; @@ -114,7 +115,7 @@ pub struct BlueprintBuilder<'a> { external_dns_version: Generation, // These fields are used to allocate resources from sleds. - policy: &'a Policy, + input: &'a PlanningInput, sled_ip_allocators: BTreeMap, // These fields will become part of the final blueprint. See the @@ -241,7 +242,7 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint: &'a Blueprint, internal_dns_version: Generation, external_dns_version: Generation, - policy: &'a Policy, + input: &'a PlanningInput, creator: &str, ) -> anyhow::Result> { let log = log.new(o!( @@ -341,7 +342,8 @@ impl<'a> BlueprintBuilder<'a> { .filter(move |ip| !existing_nexus_v6_ips.contains(ip)), ); let available_external_ips = Box::new( - policy + input + .policy .service_ip_pool_ranges .iter() .flat_map(|r| r.iter()) @@ -356,7 +358,7 @@ impl<'a> BlueprintBuilder<'a> { parent_blueprint, internal_dns_version, external_dns_version, - policy, + input, sled_ip_allocators: BTreeMap::new(), zones: BlueprintZonesBuilder::new(parent_blueprint), creator: creator.to_owned(), @@ -373,7 +375,7 @@ impl<'a> BlueprintBuilder<'a> { pub fn build(mut self) -> Blueprint { // Collect the Omicron zones config for each in-service sled. let blueprint_zones = - self.zones.into_zones_map(self.policy.sleds.keys().copied()); + self.zones.into_zones_map(self.input.policy.sleds.keys().copied()); Blueprint { id: self.rng.blueprint_rng.next(), blueprint_zones, @@ -715,7 +717,7 @@ impl<'a> BlueprintBuilder<'a> { } fn sled_resources(&self, sled_id: Uuid) -> Result<&SledResources, Error> { - self.policy.sleds.get(&sled_id).ok_or_else(|| { + self.input.policy.sleds.get(&sled_id).ok_or_else(|| { Error::Planner(anyhow!( "attempted to use sled that is not in service: {}", sled_id @@ -891,14 +893,14 @@ pub mod test { // describes no changes. static TEST_NAME: &str = "blueprint_builder_test_initial"; let logctx = test_setup_log(TEST_NAME); - let (collection, policy) = + let (collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); let blueprint_initial = BlueprintBuilder::build_initial_from_collection_seeded( &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", TEST_NAME, ) @@ -929,7 +931,7 @@ pub mod test { &blueprint_initial, Generation::new(), Generation::new(), - &policy, + &input, "test_basic", ) .expect("failed to create builder"); @@ -961,7 +963,7 @@ pub mod test { blueprint1, Generation::new(), Generation::new(), - &example.policy, + &example.input, "test_basic", ) .expect("failed to create builder"); @@ -969,7 +971,7 @@ pub mod test { // The example blueprint should have internal NTP zones on all the // existing sleds, plus Crucible zones on all pools. So if we ensure // all these zones exist, we should see no change. - for (sled_id, sled_resources) in &example.policy.sleds { + for (sled_id, sled_resources) in &example.input.policy.sleds { builder.sled_ensure_zone_ntp(*sled_id).unwrap(); for pool_name in &sled_resources.zpools { builder @@ -994,17 +996,22 @@ pub mod test { let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); + let input = PlanningInput { + policy, + service_external_ips: example.input.service_external_ips, + service_nics: example.input.service_nics, + }; let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &blueprint2, Generation::new(), Generation::new(), - &policy, + &input, "test_basic", ) .expect("failed to create builder"); builder.sled_ensure_zone_ntp(new_sled_id).unwrap(); - let new_sled_resources = policy.sleds.get(&new_sled_id).unwrap(); + let new_sled_resources = input.policy.sleds.get(&new_sled_id).unwrap(); for pool_name in &new_sled_resources.zpools { builder .sled_ensure_zone_crucible(new_sled_id, pool_name.clone()) @@ -1078,7 +1085,7 @@ pub mod test { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_with_no_existing_nexus_zones"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We don't care about the DNS versions here. @@ -1099,7 +1106,7 @@ pub mod test { &collection, internal_dns_version, external_dns_version, - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1110,7 +1117,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1139,7 +1146,7 @@ pub mod test { fn test_add_nexus_error_cases() { static TEST_NAME: &str = "blueprint_builder_test_add_nexus_error_cases"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We don't care about the DNS versions here. @@ -1168,7 +1175,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1182,7 +1189,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1202,7 +1209,7 @@ pub mod test { &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1217,7 +1224,7 @@ pub mod test { // Replace the policy's external service IP pool ranges with ranges // that are already in use by existing zones. Attempting to add a // Nexus with no remaining external IPs should fail. - let mut policy = policy.clone(); + let mut input = input.clone(); let mut used_ip_ranges = Vec::new(); for (_, z) in parent.all_omicron_zones() { if let Some(ip) = z @@ -1229,14 +1236,14 @@ pub mod test { } } assert!(!used_ip_ranges.is_empty()); - policy.service_ip_pool_ranges = used_ip_ranges; + input.policy.service_ip_pool_ranges = used_ip_ranges; let mut builder = BlueprintBuilder::new_based_on( &logctx.log, &parent, internal_dns_version, external_dns_version, - &policy, + &input, "test", ) .expect("failed to create builder"); @@ -1267,7 +1274,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_external_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1299,7 +1306,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1310,7 +1317,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), @@ -1329,7 +1336,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_nexus_zones_with_same_nic_ip"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1359,7 +1366,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1370,7 +1377,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), @@ -1389,7 +1396,7 @@ pub mod test { "blueprint_builder_test_invalid_parent_blueprint_\ two_zones_with_same_vnic_mac"; let logctx = test_setup_log(TEST_NAME); - let (mut collection, policy) = + let (mut collection, input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // We should fail if the parent blueprint claims to contain two @@ -1419,7 +1426,7 @@ pub mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "test", TEST_NAME, ) @@ -1430,7 +1437,7 @@ pub mod test { &parent, Generation::new(), Generation::new(), - &policy, + &input, "test", ) { Ok(_) => panic!("unexpected success"), diff --git a/nexus/reconfigurator/planning/src/example.rs b/nexus/reconfigurator/planning/src/example.rs index a18e3b71cf..563b3662bf 100644 --- a/nexus/reconfigurator/planning/src/example.rs +++ b/nexus/reconfigurator/planning/src/example.rs @@ -9,15 +9,22 @@ use crate::system::SledBuilder; use crate::system::SystemDescription; use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintZoneFilter; -use nexus_types::deployment::Policy; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::TypedUuid; use sled_agent_client::types::OmicronZonesConfig; +use std::collections::BTreeMap; use typed_rng::UuidRng; +use uuid::Uuid; pub struct ExampleSystem { pub system: SystemDescription, - pub policy: Policy, + pub input: PlanningInput, pub collection: Collection, pub blueprint: Blueprint, // If we add more types of RNGs than just sleds here, we'll need to @@ -46,6 +53,11 @@ impl ExampleSystem { let policy = system.to_policy().expect("failed to make policy"); let mut inventory_builder = system.to_collection_builder().expect("failed to build collection"); + let mut input = PlanningInput { + policy, + service_external_ips: BTreeMap::new(), + service_nics: BTreeMap::new(), + }; // For each sled, have it report 0 zones in the initial inventory. // This will enable us to build a blueprint from the initial @@ -69,7 +81,7 @@ impl ExampleSystem { &empty_zone_inventory, Generation::new(), Generation::new(), - &policy, + &input.policy, "test suite", (test_name, "ExampleSystem initial"), ) @@ -77,16 +89,16 @@ impl ExampleSystem { // Now make a blueprint and collection with some zones on each sled. let mut builder = BlueprintBuilder::new_based_on( - &log, + log, &initial_blueprint, Generation::new(), Generation::new(), - &policy, + &input, "test suite", ) .unwrap(); builder.set_rng_seed((test_name, "ExampleSystem make_zones")); - for (sled_id, sled_resources) in &policy.sleds { + for (sled_id, sled_resources) in &input.policy.sleds { let _ = builder.sled_ensure_zone_ntp(*sled_id).unwrap(); let _ = builder .sled_ensure_zone_multiple_nexus_with_config( @@ -112,6 +124,28 @@ impl ExampleSystem { let Some(zones) = blueprint.blueprint_zones.get(&sled_id) else { continue; }; + for zone in zones.zones.iter().map(|z| &z.config) { + let service_id = + TypedUuid::::from_untyped_uuid(zone.id); + if let Ok(Some(ip)) = zone.zone_type.external_ip() { + input.service_external_ips.insert( + service_id, + ExternalIp { id: Uuid::new_v4(), ip: ip.into() }, + ); + } + if let Some(nic) = zone.zone_type.service_vnic() { + input.service_nics.insert( + service_id, + ServiceNetworkInterface { + id: nic.id, + mac: nic.mac, + ip: nic.ip.into(), + slot: nic.slot, + primary: nic.primary, + }, + ); + } + } builder .found_sled_omicron_zones( "fake sled agent", @@ -125,7 +159,7 @@ impl ExampleSystem { ExampleSystem { system, - policy, + input, collection: builder.build(), blueprint, sled_rng, @@ -133,7 +167,7 @@ impl ExampleSystem { } } -/// Returns a collection and policy describing a pretty simple system. +/// Returns a collection and planning input describing a pretty simple system. /// /// The test name is used as the RNG seed. /// @@ -144,7 +178,7 @@ pub fn example( log: &slog::Logger, test_name: &str, nsleds: usize, -) -> (Collection, Policy) { +) -> (Collection, PlanningInput) { let example = ExampleSystem::new(log, test_name, nsleds); - (example.collection, example.policy) + (example.collection, example.input) } diff --git a/nexus/reconfigurator/planning/src/planner.rs b/nexus/reconfigurator/planning/src/planner.rs index ce5660e7f6..280ac61ede 100644 --- a/nexus/reconfigurator/planning/src/planner.rs +++ b/nexus/reconfigurator/planning/src/planner.rs @@ -11,7 +11,7 @@ use crate::blueprint_builder::Ensure; use crate::blueprint_builder::EnsureMultiple; use crate::blueprint_builder::Error; use nexus_types::deployment::Blueprint; -use nexus_types::deployment::Policy; +use nexus_types::deployment::PlanningInput; use nexus_types::external_api::views::SledState; use nexus_types::inventory::Collection; use omicron_common::api::external::Generation; @@ -23,7 +23,7 @@ use uuid::Uuid; pub struct Planner<'a> { log: Logger, - policy: &'a Policy, + input: &'a PlanningInput, blueprint: BlueprintBuilder<'a>, // latest inventory collection // @@ -43,7 +43,7 @@ impl<'a> Planner<'a> { parent_blueprint: &'a Blueprint, internal_dns_version: Generation, external_dns_version: Generation, - policy: &'a Policy, + input: &'a PlanningInput, creator: &str, // NOTE: Right now, we just assume that this is the latest inventory // collection. See the comment on the corresponding field in `Planner`. @@ -54,10 +54,10 @@ impl<'a> Planner<'a> { parent_blueprint, internal_dns_version, external_dns_version, - policy, + input, creator, )?; - Ok(Planner { log, policy, blueprint, inventory }) + Ok(Planner { log, input, blueprint, inventory }) } /// Within tests, set a seeded RNG for deterministic results. @@ -98,7 +98,7 @@ impl<'a> Planner<'a> { // is fine. let mut sleds_ineligible_for_services = BTreeSet::new(); - for (sled_id, sled_info) in &self.policy.sleds { + for (sled_id, sled_info) in &self.input.policy.sleds { // Decommissioned sleds don't get any services. (This is an // explicit match so that when more states are added, this fails to // compile.) @@ -200,10 +200,12 @@ impl<'a> Planner<'a> { // sleds so we can avoid any non-provisionable sleds under the // assumption that there is something amiss with them. sleds_ineligible_for_services.extend( - self.policy.sleds.iter().filter_map(|(sled_id, sled_info)| { - (!sled_info.is_eligible_for_discretionary_services()) - .then_some(*sled_id) - }), + self.input.policy.sleds.iter().filter_map( + |(sled_id, sled_info)| { + (!sled_info.is_eligible_for_discretionary_services()) + .then_some(*sled_id) + }, + ), ); self.ensure_correct_number_of_nexus_zones( @@ -222,7 +224,7 @@ impl<'a> Planner<'a> { let mut num_total_nexus = 0; let mut sleds_by_num_nexus: BTreeMap> = BTreeMap::new(); - for &sled_id in self.policy.sleds.keys() { + for &sled_id in self.input.policy.sleds.keys() { let num_nexus = self.blueprint.sled_num_nexus_zones(sled_id); num_total_nexus += num_nexus; @@ -237,12 +239,15 @@ impl<'a> Planner<'a> { // TODO-correctness What should we do if we have _too many_ Nexus // instances? For now, just log it the number of zones any time we have // at least the minimum number. - let nexus_to_add = - self.policy.target_nexus_zone_count.saturating_sub(num_total_nexus); + let nexus_to_add = self + .input + .policy + .target_nexus_zone_count + .saturating_sub(num_total_nexus); if nexus_to_add == 0 { info!( self.log, "sufficient Nexus zones exist in plan"; - "desired_count" => self.policy.target_nexus_zone_count, + "desired_count" => self.input.policy.target_nexus_zone_count, "current_count" => num_total_nexus, ); return Ok(()); @@ -345,6 +350,7 @@ mod test { use nexus_inventory::now_db_precision; use nexus_types::deployment::BlueprintZoneDisposition; use nexus_types::deployment::BlueprintZoneFilter; + use nexus_types::deployment::PlanningInput; use nexus_types::external_api::views::SledPolicy; use nexus_types::external_api::views::SledProvisionPolicy; use nexus_types::external_api::views::SledState; @@ -374,7 +380,7 @@ mod test { &example.collection, internal_dns_version, external_dns_version, - &example.policy, + &example.input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -389,7 +395,7 @@ mod test { &blueprint1, internal_dns_version, external_dns_version, - &example.policy, + &example.input, "no-op?", &example.collection, ) @@ -410,6 +416,11 @@ mod test { let _ = example.system.sled(SledBuilder::new().id(new_sled_id)).unwrap(); let policy = example.system.to_policy().unwrap(); + let input = PlanningInput { + policy, + service_external_ips: example.input.service_external_ips, + service_nics: example.input.service_nics, + }; // Check that the first step is to add an NTP zone let blueprint3 = Planner::new_based_on( @@ -417,7 +428,7 @@ mod test { &blueprint2, internal_dns_version, external_dns_version, - &policy, + &input, "test: add NTP?", &example.collection, ) @@ -459,7 +470,7 @@ mod test { &blueprint3, internal_dns_version, external_dns_version, - &policy, + &input, "test: add nothing more", &example.collection, ) @@ -501,7 +512,7 @@ mod test { &blueprint3, internal_dns_version, external_dns_version, - &policy, + &input, "test: add Crucible zones?", &collection, ) @@ -543,7 +554,7 @@ mod test { &blueprint5, internal_dns_version, external_dns_version, - &policy, + &input, "test: no-op?", &collection, ) @@ -575,21 +586,21 @@ mod test { // Use our example inventory collection as a starting point, but strip // it down to just one sled. - let (sled_id, collection, mut policy) = { - let (mut collection, mut policy) = + let (sled_id, collection, mut input) = { + let (mut collection, mut input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Pick one sled ID to keep and remove the rest. let keep_sled_id = - policy.sleds.keys().next().copied().expect("no sleds"); - policy.sleds.retain(|&k, _v| keep_sled_id == k); + input.policy.sleds.keys().next().copied().expect("no sleds"); + input.policy.sleds.retain(|&k, _v| keep_sled_id == k); collection.sled_agents.retain(|&k, _v| keep_sled_id == k); collection.omicron_zones.retain(|&k, _v| keep_sled_id == k); assert_eq!(collection.sled_agents.len(), 1); assert_eq!(collection.omicron_zones.len(), 1); - (keep_sled_id, collection, policy) + (keep_sled_id, collection, input) }; // Build the initial blueprint. @@ -598,7 +609,7 @@ mod test { &collection, internal_dns_version, external_dns_version, - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -621,13 +632,13 @@ mod test { // Now run the planner. It should add additional Nexus instances to the // one sled we have. - policy.target_nexus_zone_count = 5; + input.policy.target_nexus_zone_count = 5; let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, internal_dns_version, external_dns_version, - &policy, + &input, "test_blueprint2", &collection, ) @@ -647,7 +658,7 @@ mod test { assert_eq!(sled_changes.zones_removed().len(), 0); assert_eq!(sled_changes.zones_modified().count(), 0); let zones = sled_changes.zones_added().collect::>(); - assert_eq!(zones.len(), policy.target_nexus_zone_count - 1); + assert_eq!(zones.len(), input.policy.target_nexus_zone_count - 1); for zone in &zones { if !zone.config.zone_type.is_nexus() { panic!("unexpectedly added a non-Nexus zone: {zone:?}"); @@ -666,7 +677,7 @@ mod test { let logctx = test_setup_log(TEST_NAME); // Use our example inventory collection as a starting point. - let (collection, mut policy) = + let (collection, mut input) = example(&logctx.log, TEST_NAME, DEFAULT_N_SLEDS); // Build the initial blueprint. @@ -675,7 +686,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -695,13 +706,13 @@ mod test { } // Now run the planner with a high number of target Nexus zones. - policy.target_nexus_zone_count = 14; + input.policy.target_nexus_zone_count = 14; let blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, Generation::new(), Generation::new(), - &policy, + &input, "test_blueprint2", &collection, ) @@ -758,7 +769,7 @@ mod test { // and decommissioned sleds. (When we add more kinds of // non-provisionable states in the future, we'll have to add more // sleds.) - let (collection, mut policy) = example(&logctx.log, TEST_NAME, 5); + let (collection, mut input) = example(&logctx.log, TEST_NAME, 5); // Build the initial blueprint. let blueprint1 = @@ -766,7 +777,7 @@ mod test { &collection, Generation::new(), Generation::new(), - &policy, + &input.policy, "the_test", (TEST_NAME, "bp1"), ) @@ -787,7 +798,7 @@ mod test { // Arbitrarily choose some of the sleds and mark them non-provisionable // in various ways. - let mut sleds_iter = policy.sleds.iter_mut(); + let mut sleds_iter = input.policy.sleds.iter_mut(); let nonprovisionable_sled_id = { let (sled_id, resources) = sleds_iter.next().expect("no sleds"); @@ -817,13 +828,13 @@ mod test { // // When the planner gets smarter about removing zones from expunged // and/or removed sleds, we'll have to adjust this number. - policy.target_nexus_zone_count = 16; + input.policy.target_nexus_zone_count = 16; let mut blueprint2 = Planner::new_based_on( logctx.log.clone(), &blueprint1, Generation::new(), Generation::new(), - &policy, + &input, "test_blueprint2", &collection, ) diff --git a/nexus/src/app/deployment.rs b/nexus/src/app/deployment.rs index b38508d74c..9e926b202a 100644 --- a/nexus/src/app/deployment.rs +++ b/nexus/src/app/deployment.rs @@ -14,7 +14,9 @@ use nexus_types::deployment::Blueprint; use nexus_types::deployment::BlueprintMetadata; use nexus_types::deployment::BlueprintTarget; use nexus_types::deployment::BlueprintTargetSet; -use nexus_types::deployment::Policy; +use nexus_types::deployment::ExternalIp; +use nexus_types::deployment::PlanningInput; +use nexus_types::deployment::ServiceNetworkInterface; use nexus_types::inventory::Collection; use omicron_common::address::NEXUS_REDUNDANCY; use omicron_common::api::external::CreateResult; @@ -26,12 +28,15 @@ use omicron_common::api::external::InternalContext; use omicron_common::api::external::ListResultVec; use omicron_common::api::external::LookupResult; use omicron_common::api::external::LookupType; +use omicron_uuid_kinds::GenericUuid; +use omicron_uuid_kinds::OmicronZoneKind; +use omicron_uuid_kinds::TypedUuid; use slog_error_chain::InlineErrorChain; use uuid::Uuid; /// Common structure for collecting information that the planner needs struct PlanningContext { - policy: Policy, + planning_input: PlanningInput, creator: String, inventory: Option, internal_dns_version: Generation, @@ -151,6 +156,49 @@ impl super::Nexus { NEXUS_REDUNDANCY, )?; + let service_external_ips = datastore + .external_ip_list_service_all_batched(opctx) + .await? + .into_iter() + .filter_map(|external_ip| { + if !external_ip.is_service { + error!( + opctx.log, + "non-service external IP returned by service IP query"; + "external-ip" => ?external_ip, + ); + return None; + } + let Some(service_id) = external_ip.parent_id else { + error!( + opctx.log, + "service external IP with no parent ID set"; + "external-ip" => ?external_ip, + ); + return None; + }; + Some(( + TypedUuid::::from_untyped_uuid(service_id), + ExternalIp::from(external_ip), + )) + }) + .collect(); + let service_nics = datastore + .service_network_interfaces_all_list_batched(opctx) + .await? + .into_iter() + .map(|nic| { + ( + TypedUuid::::from_untyped_uuid( + nic.service_id, + ), + ServiceNetworkInterface::from(nic), + ) + }) + .collect(); + let planning_input = + PlanningInput { policy, service_external_ips, service_nics }; + // The choice of which inventory collection to use here is not // necessarily trivial. Inventory collections may be incomplete due to // transient (or even persistent) errors. It's not yet clear what @@ -186,8 +234,8 @@ impl super::Nexus { )?; Ok(PlanningContext { + planning_input, creator, - policy, inventory, internal_dns_version: *internal_dns_version.version, external_dns_version: *external_dns_version.version, @@ -216,7 +264,7 @@ impl super::Nexus { &collection, planning_context.internal_dns_version, planning_context.external_dns_version, - &planning_context.policy, + &planning_context.planning_input.policy, &planning_context.creator, ) .map_err(|error| { @@ -252,7 +300,7 @@ impl super::Nexus { &parent_blueprint, planning_context.internal_dns_version, planning_context.external_dns_version, - &planning_context.policy, + &planning_context.planning_input, &planning_context.creator, &inventory, ) diff --git a/nexus/types/Cargo.toml b/nexus/types/Cargo.toml index aff45d07de..68b1444cc1 100644 --- a/nexus/types/Cargo.toml +++ b/nexus/types/Cargo.toml @@ -10,6 +10,7 @@ chrono.workspace = true base64.workspace = true futures.workspace = true humantime.workspace = true +ipnetwork.workspace = true omicron-uuid-kinds.workspace = true openssl.workspace = true parse-display.workspace = true @@ -21,6 +22,7 @@ steno.workspace = true strum.workspace = true tabled.workspace = true thiserror.workspace = true +newtype-uuid.workspace = true uuid.workspace = true api_identity.workspace = true diff --git a/nexus/types/src/deployment.rs b/nexus/types/src/deployment.rs index 4c4f3823c6..bed66adaca 100644 --- a/nexus/types/src/deployment.rs +++ b/nexus/types/src/deployment.rs @@ -22,10 +22,14 @@ pub use crate::inventory::OmicronZoneType; pub use crate::inventory::OmicronZonesConfig; pub use crate::inventory::SourceNatConfig; pub use crate::inventory::ZpoolName; +use ipnetwork::IpNetwork; +use newtype_uuid::TypedUuid; use omicron_common::address::IpRange; use omicron_common::address::Ipv6Subnet; use omicron_common::address::SLED_PREFIX; use omicron_common::api::external::Generation; +use omicron_common::api::external::MacAddr; +use omicron_uuid_kinds::OmicronZoneKind; use schemars::JsonSchema; use serde::Deserialize; use serde::Serialize; @@ -101,6 +105,48 @@ impl SledResources { } } +/// Policy and database inputs to the Reconfigurator planner +/// +/// The primary inputs to the planner are the parent (either a parent blueprint +/// or an inventory collection) and this structure. This type holds the +/// fleet-wide policy as well as any additional information fetched from CRDB +/// that the planner needs to make decisions. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct PlanningInput { + /// fleet-wide policy + pub policy: Policy, + + /// external IPs allocated to services + pub service_external_ips: BTreeMap, ExternalIp>, + + /// vNICs allocated to services + pub service_nics: + BTreeMap, ServiceNetworkInterface>, +} + +/// External IP allocated to a service +/// +/// This is a slimmer `nexus_db_model::ExternalIp` that only stores the fields +/// necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ExternalIp { + pub id: Uuid, + pub ip: IpNetwork, +} + +/// Network interface allocated to a service +/// +/// This is a slimmer `nexus_db_model::ServiceNetworkInterface` that only stores +/// the fields necessary for blueprint planning. +#[derive(Debug, Clone, Serialize, Deserialize)] +pub struct ServiceNetworkInterface { + pub id: Uuid, + pub mac: MacAddr, + pub ip: IpNetwork, + pub slot: u8, + pub primary: bool, +} + /// Describes a complete set of software and configuration for the system // Blueprints are a fundamental part of how the system modifies itself. Each // blueprint completely describes all of the software and configuration diff --git a/uuid-kinds/src/lib.rs b/uuid-kinds/src/lib.rs index 7018485b59..17aa803d13 100644 --- a/uuid-kinds/src/lib.rs +++ b/uuid-kinds/src/lib.rs @@ -48,6 +48,7 @@ impl_typed_uuid_kind! { DownstairsKind => "downstairs", DownstairsRegionKind => "downstairs_region", LoopbackAddressKind => "loopback_address", + OmicronZoneKind => "service", TufRepoKind => "tuf_repo", UpstairsKind => "upstairs", UpstairsRepairKind => "upstairs_repair",