Skip to content

Commit

Permalink
[sled-agent] Rename switch zone functions to be more explicit (#6163)
Browse files Browse the repository at this point in the history
When working on the switch zone set up PRs, I was confused by some of
these function names thinking they may apply to other zones apart from
the switch zone. This very small PR changes a few names to help with
readability of the code.
  • Loading branch information
karencfv authored Jul 29, 2024
1 parent 24f7cc0 commit 945e75f
Showing 1 changed file with 57 additions and 51 deletions.
108 changes: 57 additions & 51 deletions sled-agent/src/services.rs
Original file line number Diff line number Diff line change
Expand Up @@ -121,6 +121,9 @@ use illumos_utils::zone::MockZones as Zones;
use illumos_utils::zone::Zones;

const IPV6_UNSPECIFIED: IpAddr = IpAddr::V6(Ipv6Addr::UNSPECIFIED);

const COCKROACH: &str = "/opt/oxide/cockroachdb/bin/cockroach";

pub const SWITCH_ZONE_BASEBOARD_FILE: &str = "/opt/oxide/baseboard.json";

#[derive(thiserror::Error, Debug, slog_error_chain::SlogInlineError)]
Expand Down Expand Up @@ -153,8 +156,8 @@ pub enum Error {
#[error("No U.2 devices found with a {ZONE_DATASET} mountpoint")]
U2NotFound,

#[error("Sled-local zone error: {0}")]
SledLocalZone(anyhow::Error),
#[error("Switch zone error: {0}")]
SwitchZone(anyhow::Error),

#[error("Failed to issue SMF command: {0}")]
SmfCommand(#[from] illumos_utils::smf_helper::Error),
Expand Down Expand Up @@ -206,8 +209,8 @@ pub enum Error {
#[error("Error contacting dpd: {0}")]
DpdError(#[from] DpdError<DpdTypes::Error>),

#[error("Failed to create Vnic in sled-local zone: {0}")]
SledLocalVnicCreation(illumos_utils::dladm::CreateVnicError),
#[error("Failed to create Vnic in the switch zone: {0}")]
SwitchZoneVnicCreation(illumos_utils::dladm::CreateVnicError),

#[error("Failed to add GZ addresses: {message}: {err}")]
GzAddress {
Expand Down Expand Up @@ -544,9 +547,9 @@ impl<'a> ZoneArgs<'a> {
}
}

/// If this is a sled-local (switch) zone, iterate over the services it's
/// If this is a switch zone, iterate over the services it's
/// supposed to be running
pub fn sled_local_services(
pub fn switch_zone_services(
&self,
) -> Box<dyn Iterator<Item = &'a SwitchService> + 'a> {
match self {
Expand Down Expand Up @@ -586,8 +589,8 @@ impl Task {
}
}

/// Describes the state of a sled-local zone.
enum SledLocalZone {
/// Describes the state of a switch zone.
enum SwitchZoneState {
// The zone is not currently running.
Disabled,
// The zone is still initializing - it may be awaiting the initialization
Expand Down Expand Up @@ -645,7 +648,7 @@ type ZoneMap = BTreeMap<String, OmicronZone>;
pub struct ServiceManagerInner {
log: Logger,
global_zone_bootstrap_link_local_address: Ipv6Addr,
switch_zone: Mutex<SledLocalZone>,
switch_zone: Mutex<SwitchZoneState>,
sled_mode: SledMode,
time_sync_config: TimeSyncConfig,
time_synced: AtomicBool,
Expand Down Expand Up @@ -727,7 +730,7 @@ impl ServiceManager {
.global_zone_bootstrap_link_local_ip,
// TODO(https://github.com/oxidecomputer/omicron/issues/725):
// Load the switch zone if it already exists?
switch_zone: Mutex::new(SledLocalZone::Disabled),
switch_zone: Mutex::new(SwitchZoneState::Disabled),
sled_mode,
time_sync_config,
time_synced: AtomicBool::new(false),
Expand Down Expand Up @@ -943,7 +946,7 @@ impl ServiceManager {
// physical devices need to be mapped into the zone when it is created.
fn devices_needed(zone_args: &ZoneArgs<'_>) -> Result<Vec<String>, Error> {
let mut devices = vec![];
for svc_details in zone_args.sled_local_services() {
for svc_details in zone_args.switch_zone_services() {
match svc_details {
SwitchService::Dendrite {
asic: DendriteAsic::TofinoAsic,
Expand Down Expand Up @@ -991,7 +994,7 @@ impl ServiceManager {
.inner
.bootstrap_vnic_allocator
.new_bootstrap()
.map_err(Error::SledLocalVnicCreation)?;
.map_err(Error::SwitchZoneVnicCreation)?;
Ok(Some((link, self.inner.switch_zone_bootstrap_address)))
} else {
Ok(None)
Expand Down Expand Up @@ -1027,7 +1030,7 @@ impl ServiceManager {
Error::Underlay(underlay::Error::SystemDetection(e))
})?;

for svc_details in zone_args.sled_local_services() {
for svc_details in zone_args.switch_zone_services() {
match &svc_details {
SwitchService::Tfport { pkt_source, asic: _ } => {
// The tfport service requires a MAC device to/from which
Expand Down Expand Up @@ -1251,7 +1254,7 @@ impl ServiceManager {
"dtrace_user".to_string(),
"dtrace_proc".to_string(),
];
for svc_details in zone_args.sled_local_services() {
for svc_details in zone_args.switch_zone_services() {
match svc_details {
SwitchService::Tfport { .. } => {
needed.push("sys_dl_config".to_string());
Expand Down Expand Up @@ -2526,7 +2529,7 @@ impl ServiceManager {
&device_names[0].clone(),
);
} else {
return Err(Error::SledLocalZone(
return Err(Error::SwitchZone(
anyhow::anyhow!(
"{dev_cnt} devices needed \
for tofino asic"
Expand Down Expand Up @@ -3073,7 +3076,7 @@ impl ServiceManager {
name: &str,
) -> Result<ZoneBundleMetadata, BundleError> {
// Search for the named zone.
if let SledLocalZone::Running { zone, .. } =
if let SwitchZoneState::Running { zone, .. } =
&*self.inner.switch_zone.lock().await
{
if zone.name() == name {
Expand Down Expand Up @@ -3483,7 +3486,7 @@ impl ServiceManager {
"Initializing CRDB Cluster - sending request to {host}"
);
if let Err(err) = zone.runtime.run_cmd(&[
"/opt/oxide/cockroachdb/bin/cockroach",
COCKROACH,
"init",
"--insecure",
"--host",
Expand All @@ -3499,7 +3502,7 @@ impl ServiceManager {
info!(log, "Formatting CRDB");
zone.runtime
.run_cmd(&[
"/opt/oxide/cockroachdb/bin/cockroach",
COCKROACH,
"sql",
"--insecure",
"--host",
Expand All @@ -3510,7 +3513,7 @@ impl ServiceManager {
.map_err(|err| Error::CockroachInit { err })?;
zone.runtime
.run_cmd(&[
"/opt/oxide/cockroachdb/bin/cockroach",
COCKROACH,
"sql",
"--insecure",
"--host",
Expand Down Expand Up @@ -3666,7 +3669,7 @@ impl ServiceManager {
// A pure gimlet sled should not be trying to activate a switch
// zone.
SledMode::Gimlet => {
return Err(Error::SledLocalZone(anyhow::anyhow!(
return Err(Error::SwitchZone(anyhow::anyhow!(
"attempted to activate switch zone on non-scrimlet sled"
)))
}
Expand Down Expand Up @@ -3749,7 +3752,7 @@ impl ServiceManager {
let request =
SwitchZoneConfig { id: Uuid::new_v4(), addresses, services };

self.ensure_zone(
self.ensure_switch_zone(
// request=
Some(request),
// filesystems=
Expand Down Expand Up @@ -3802,15 +3805,15 @@ impl ServiceManager {
let mut switch_zone = self.inner.switch_zone.lock().await;

let zone = match &mut *switch_zone {
SledLocalZone::Running { zone, .. } => zone,
SledLocalZone::Disabled => {
return Err(Error::SledLocalZone(anyhow!(
SwitchZoneState::Running { zone, .. } => zone,
SwitchZoneState::Disabled => {
return Err(Error::SwitchZone(anyhow!(
"Cannot configure switch zone uplinks: \
switch zone disabled"
)));
}
SledLocalZone::Initializing { .. } => {
return Err(Error::SledLocalZone(anyhow!(
SwitchZoneState::Initializing { .. } => {
return Err(Error::SwitchZone(anyhow!(
"Cannot configure switch zone uplinks: \
switch zone still initializing"
)));
Expand Down Expand Up @@ -3843,7 +3846,7 @@ impl ServiceManager {

/// Ensures that no switch zone is active.
pub async fn deactivate_switch(&self) -> Result<(), Error> {
self.ensure_zone(
self.ensure_switch_zone(
// request=
None,
// filesystems=
Expand All @@ -3854,32 +3857,32 @@ impl ServiceManager {
.await
}

// Forcefully initialize a sled-local zone.
// Forcefully initialize a sled-local switch zone.
//
// This is a helper function for "ensure_zone".
fn start_zone(
// This is a helper function for "ensure_switch_zone".
fn start_switch_zone(
self,
zone: &mut SledLocalZone,
zone: &mut SwitchZoneState,
request: SwitchZoneConfig,
filesystems: Vec<zone::Fs>,
data_links: Vec<String>,
) {
let (exit_tx, exit_rx) = oneshot::channel();
*zone = SledLocalZone::Initializing {
*zone = SwitchZoneState::Initializing {
request,
filesystems,
data_links,
worker: Some(Task {
exit_tx,
initializer: tokio::task::spawn(async move {
self.initialize_zone_loop(exit_rx).await
self.initialize_switch_zone_loop(exit_rx).await
}),
}),
};
}

// Moves the current state to align with the "request".
async fn ensure_zone(
async fn ensure_switch_zone(
&self,
request: Option<SwitchZoneConfig>,
filesystems: Vec<zone::Fs>,
Expand All @@ -3891,25 +3894,25 @@ impl ServiceManager {
let zone_typestr = "switch";

match (&mut *sled_zone, request) {
(SledLocalZone::Disabled, Some(request)) => {
(SwitchZoneState::Disabled, Some(request)) => {
info!(log, "Enabling {zone_typestr} zone (new)");
self.clone().start_zone(
self.clone().start_switch_zone(
&mut sled_zone,
request,
filesystems,
data_links,
);
}
(
SledLocalZone::Initializing { request, .. },
SwitchZoneState::Initializing { request, .. },
Some(new_request),
) => {
info!(log, "Enabling {zone_typestr} zone (already underway)");
// The zone has not started yet -- we can simply replace
// the next request with our new request.
*request = new_request;
}
(SledLocalZone::Running { request, zone }, Some(new_request))
(SwitchZoneState::Running { request, zone }, Some(new_request))
if request.addresses != new_request.addresses =>
{
// If the switch zone is running but we have new addresses, it
Expand Down Expand Up @@ -4221,31 +4224,31 @@ impl ServiceManager {
}
}
}
(SledLocalZone::Running { .. }, Some(_)) => {
(SwitchZoneState::Running { .. }, Some(_)) => {
info!(log, "Enabling {zone_typestr} zone (already complete)");
}
(SledLocalZone::Disabled, None) => {
(SwitchZoneState::Disabled, None) => {
info!(log, "Disabling {zone_typestr} zone (already complete)");
}
(SledLocalZone::Initializing { worker, .. }, None) => {
(SwitchZoneState::Initializing { worker, .. }, None) => {
info!(log, "Disabling {zone_typestr} zone (was initializing)");
worker.take().unwrap().stop().await;
*sled_zone = SledLocalZone::Disabled;
*sled_zone = SwitchZoneState::Disabled;
}
(SledLocalZone::Running { zone, .. }, None) => {
(SwitchZoneState::Running { zone, .. }, None) => {
info!(log, "Disabling {zone_typestr} zone (was running)");
let _ = zone.stop().await;
*sled_zone = SledLocalZone::Disabled;
*sled_zone = SwitchZoneState::Disabled;
}
}
Ok(())
}

async fn try_initialize_sled_local_zone(
async fn try_initialize_switch_zone(
&self,
sled_zone: &mut SledLocalZone,
sled_zone: &mut SwitchZoneState,
) -> Result<(), Error> {
let SledLocalZone::Initializing {
let SwitchZoneState::Initializing {
request,
filesystems,
data_links,
Expand All @@ -4269,18 +4272,21 @@ impl ServiceManager {
let zone = self
.initialize_zone(zone_args, filesystems, data_links, None)
.await?;
*sled_zone = SledLocalZone::Running { request: request.clone(), zone };
*sled_zone =
SwitchZoneState::Running { request: request.clone(), zone };
Ok(())
}

// Body of a tokio task responsible for running until the switch zone is
// inititalized, or it has been told to stop.
async fn initialize_zone_loop(&self, mut exit_rx: oneshot::Receiver<()>) {
async fn initialize_switch_zone_loop(
&self,
mut exit_rx: oneshot::Receiver<()>,
) {
loop {
{
let mut sled_zone = self.inner.switch_zone.lock().await;
match self.try_initialize_sled_local_zone(&mut sled_zone).await
{
match self.try_initialize_switch_zone(&mut sled_zone).await {
Ok(()) => return,
Err(e) => warn!(
self.inner.log,
Expand Down

0 comments on commit 945e75f

Please sign in to comment.