Skip to content

Commit

Permalink
handle absence of specs during migration
Browse files Browse the repository at this point in the history
  • Loading branch information
gjcolombo committed Dec 16, 2024
1 parent 3af85e7 commit 8aa1010
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 52 deletions.
9 changes: 7 additions & 2 deletions bin/propolis-cli/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -15,7 +15,9 @@ use anyhow::{anyhow, Context};
use clap::{Parser, Subcommand};
use futures::{future, SinkExt};
use newtype_uuid::{GenericUuid, TypedUuid, TypedUuidKind, TypedUuidTag};
use propolis_client::types::{InstanceMetadata, VersionedInstanceSpec};
use propolis_client::types::{
InstanceMetadata, InstanceSpecStatus, VersionedInstanceSpec,
};
use slog::{o, Drain, Level, Logger};
use tokio::io::{AsyncReadExt, AsyncWriteExt};
use tokio_tungstenite::tungstenite::{
Expand Down Expand Up @@ -504,7 +506,10 @@ async fn migrate_instance(
anyhow!("failed to get src instance properties")
})?;
let src_uuid = src_instance.properties.id;
let VersionedInstanceSpec::V0(spec) = &src_instance.spec;
let InstanceSpecStatus::Present(spec) = &src_instance.spec else {
anyhow::bail!("source instance doesn't have a spec yet");
};
let VersionedInstanceSpec::V0(spec) = &spec;

let request = InstanceEnsureRequest {
properties: InstanceProperties {
Expand Down
128 changes: 85 additions & 43 deletions bin/propolis-server/src/lib/vm/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -87,8 +87,8 @@ use oximeter::types::ProducerRegistry;
use propolis_api_types::{
instance_spec::VersionedInstanceSpec, InstanceEnsureResponse,
InstanceMigrateStatusResponse, InstanceMigrationStatus, InstanceProperties,
InstanceSpecGetResponse, InstanceState, InstanceStateMonitorResponse,
MigrationState,
InstanceSpecGetResponse, InstanceSpecStatus, InstanceState,
InstanceStateMonitorResponse, MigrationState,
};
use slog::info;
use state_driver::StateDriverOutput;
Expand Down Expand Up @@ -204,6 +204,29 @@ struct VmInner {
driver: Option<tokio::task::JoinHandle<StateDriverOutput>>,
}

/// Stores a possibly-absent instance spec with a reason for its absence.
#[derive(Clone, Debug)]
enum MaybeSpec {
Present(Spec),

/// The spec is not known yet because the VM is initializing via live
/// migration, and the source's spec is not available yet.
WaitingForMigrationSource,
}

impl From<MaybeSpec> for InstanceSpecStatus {
fn from(value: MaybeSpec) -> Self {
match value {
MaybeSpec::WaitingForMigrationSource => {
Self::WaitingForMigrationSource
}
MaybeSpec::Present(spec) => {
Self::Present(VersionedInstanceSpec::V0(spec.into()))
}
}
}
}

/// Describes a past or future VM and its properties.
struct VmDescription {
/// Records the VM's last externally-visible state.
Expand All @@ -212,9 +235,6 @@ struct VmDescription {
/// The VM's API-level instance properties.
properties: InstanceProperties,

/// The VM's last-known instance specification.
spec: Spec,

/// The runtime on which the VM's state driver is running (or on which it
/// ran).
tokio_rt: Option<tokio::runtime::Runtime>,
Expand All @@ -228,17 +248,17 @@ enum VmState {

/// A new state driver is attempting to initialize objects for a VM with the
/// ecnlosed description.
WaitingForInit(VmDescription),
WaitingForInit { vm: VmDescription, spec: MaybeSpec },

/// The VM is active, and callers can obtain a handle to its objects.
Active(active::ActiveVm),

/// The previous VM is shutting down, but its objects have not been fully
/// destroyed yet.
Rundown(VmDescription),
Rundown { vm: VmDescription, spec: Spec },

/// The previous VM and its objects have been cleaned up.
RundownComplete(VmDescription),
RundownComplete { vm: VmDescription, spec: MaybeSpec },
}

impl std::fmt::Display for VmState {
Expand All @@ -248,10 +268,10 @@ impl std::fmt::Display for VmState {
"{}",
match self {
Self::NoVm => "NoVm",
Self::WaitingForInit(_) => "WaitingForInit",
Self::WaitingForInit { .. } => "WaitingForInit",
Self::Active(_) => "Active",
Self::Rundown(_) => "Rundown",
Self::RundownComplete(_) => "RundownComplete",
Self::Rundown { .. } => "Rundown",
Self::RundownComplete { .. } => "RundownComplete",
}
)
}
Expand Down Expand Up @@ -329,20 +349,26 @@ impl Vm {
let state = vm.external_state_rx.borrow().clone();
Some(InstanceSpecGetResponse {
properties: vm.properties.clone(),
spec: VersionedInstanceSpec::V0(spec.into()),
spec: InstanceSpecStatus::Present(
VersionedInstanceSpec::V0(spec.into()),
),
state: state.state,
})
}

// If the VM is not active yet, or there is only a
// previously-run-down VM, return the state saved in the state
// machine.
VmState::WaitingForInit(vm)
| VmState::Rundown(vm)
| VmState::RundownComplete(vm) => Some(InstanceSpecGetResponse {
VmState::WaitingForInit { vm, spec }
| VmState::RundownComplete { vm, spec } => {
Some(InstanceSpecGetResponse {
properties: vm.properties.clone(),
state: vm.external_state_rx.borrow().state,
spec: spec.clone().into(),
})
}
VmState::Rundown { vm, spec } => Some(InstanceSpecGetResponse {
properties: vm.properties.clone(),
state: vm.external_state_rx.borrow().state,
spec: VersionedInstanceSpec::V0(vm.spec.clone().into()),
spec: InstanceSpecStatus::Present(VersionedInstanceSpec::V0(
spec.clone().into(),
)),
}),
}
}
Expand All @@ -359,9 +385,9 @@ impl Vm {
match &guard.state {
VmState::NoVm => None,
VmState::Active(vm) => Some(vm.external_state_rx.clone()),
VmState::WaitingForInit(vm)
| VmState::Rundown(vm)
| VmState::RundownComplete(vm) => {
VmState::WaitingForInit { vm, .. }
| VmState::Rundown { vm, .. }
| VmState::RundownComplete { vm, .. } => {
Some(vm.external_state_rx.clone())
}
}
Expand All @@ -386,7 +412,7 @@ impl Vm {
let mut guard = self.inner.write().await;
let old = std::mem::replace(&mut guard.state, VmState::NoVm);
match old {
VmState::WaitingForInit(vm) => {
VmState::WaitingForInit { vm, .. } => {
guard.state = VmState::Active(ActiveVm {
log: log.clone(),
state_driver_queue,
Expand Down Expand Up @@ -417,8 +443,8 @@ impl Vm {
let mut guard = self.inner.write().await;
let old = std::mem::replace(&mut guard.state, VmState::NoVm);
match old {
VmState::WaitingForInit(vm) => {
guard.state = VmState::RundownComplete(vm)
VmState::WaitingForInit { vm, spec } => {
guard.state = VmState::RundownComplete { vm, spec }
}
state => unreachable!(
"start failures should only occur before an active VM is \
Expand Down Expand Up @@ -449,12 +475,14 @@ impl Vm {

let spec = vm.objects().lock_shared().await.instance_spec().clone();
let ActiveVm { external_state_rx, properties, tokio_rt, .. } = vm;
guard.state = VmState::Rundown(VmDescription {
external_state_rx,
properties,
guard.state = VmState::Rundown {
vm: VmDescription {
external_state_rx,
properties,
tokio_rt: Some(tokio_rt),
},
spec,
tokio_rt: Some(tokio_rt),
});
};
vm.services
};

Expand All @@ -473,9 +501,12 @@ impl Vm {
let mut guard = self.inner.write().await;
let old = std::mem::replace(&mut guard.state, VmState::NoVm);
let rt = match old {
VmState::Rundown(mut vm) => {
VmState::Rundown { mut vm, spec } => {
let rt = vm.tokio_rt.take().expect("rundown VM has a runtime");
guard.state = VmState::RundownComplete(vm);
guard.state = VmState::RundownComplete {
vm,
spec: MaybeSpec::Present(spec),
};
rt
}
state => unreachable!(
Expand Down Expand Up @@ -551,16 +582,25 @@ impl Vm {
{
let mut guard = self.inner.write().await;
match guard.state {
VmState::WaitingForInit(_) => {
return Err(VmError::WaitingToInitialize);
VmState::WaitingForInit { .. } => {
return Err(VmError::WaitingToInitialize)
}
VmState::Active { .. } => {
return Err(VmError::AlreadyInitialized)
}
VmState::Rundown { .. } => {
return Err(VmError::RundownInProgress)
}
VmState::Active(_) => return Err(VmError::AlreadyInitialized),
VmState::Rundown(_) => return Err(VmError::RundownInProgress),
_ => {}
};

let properties = ensure_request.properties.clone();
let spec = ensure_request.instance_spec.clone();
let spec = if ensure_request.migrate.is_some() {
MaybeSpec::WaitingForMigrationSource
} else {
MaybeSpec::Present(ensure_request.instance_spec.clone())
};

let vm_for_driver = self.clone();
guard.driver = Some(tokio::spawn(async move {
state_driver::run_state_driver(
Expand All @@ -574,12 +614,14 @@ impl Vm {
.await
}));

guard.state = VmState::WaitingForInit(VmDescription {
external_state_rx: external_rx.clone(),
properties,
guard.state = VmState::WaitingForInit {
vm: VmDescription {
external_state_rx: external_rx.clone(),
properties,
tokio_rt: None,
},
spec,
tokio_rt: None,
});
};
}

// Wait for the state driver task to dispose of this request.
Expand Down
9 changes: 8 additions & 1 deletion crates/propolis-api-types/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -165,11 +165,18 @@ pub struct InstanceGetResponse {
pub instance: Instance,
}

#[derive(Clone, Deserialize, Serialize, JsonSchema)]
#[serde(tag = "type", content = "value")]
pub enum InstanceSpecStatus {
WaitingForMigrationSource,
Present(VersionedInstanceSpec),
}

#[derive(Clone, Deserialize, Serialize, JsonSchema)]
pub struct InstanceSpecGetResponse {
pub properties: InstanceProperties,
pub state: InstanceState,
pub spec: VersionedInstanceSpec,
pub spec: InstanceSpecStatus,
}

#[derive(Clone, Deserialize, Serialize, JsonSchema)]
Expand Down
38 changes: 37 additions & 1 deletion openapi/propolis-server.json
Original file line number Diff line number Diff line change
Expand Up @@ -1431,7 +1431,7 @@
"$ref": "#/components/schemas/InstanceProperties"
},
"spec": {
"$ref": "#/components/schemas/VersionedInstanceSpec"
"$ref": "#/components/schemas/InstanceSpecStatus"
},
"state": {
"$ref": "#/components/schemas/InstanceState"
Expand All @@ -1443,6 +1443,42 @@
"state"
]
},
"InstanceSpecStatus": {
"oneOf": [
{
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"WaitingForMigrationSource"
]
}
},
"required": [
"type"
]
},
{
"type": "object",
"properties": {
"type": {
"type": "string",
"enum": [
"Present"
]
},
"value": {
"$ref": "#/components/schemas/VersionedInstanceSpec"
}
},
"required": [
"type",
"value"
]
}
]
},
"InstanceSpecV0": {
"type": "object",
"properties": {
Expand Down
11 changes: 8 additions & 3 deletions phd-tests/tests/src/cpuid.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@

use cpuid_utils::{CpuidIdent, CpuidValues, CpuidVendor};
use phd_testcase::*;
use propolis_client::types::CpuidEntry;
use propolis_client::types::{
CpuidEntry, InstanceSpecStatus, VersionedInstanceSpec,
};
use tracing::info;

fn cpuid_entry(
Expand Down Expand Up @@ -34,8 +36,11 @@ async fn cpuid_instance_spec_round_trip_test(ctx: &Framework) {
vm.launch().await?;

let spec_get_response = vm.get_spec().await?;
let propolis_client::types::VersionedInstanceSpec::V0(spec) =
spec_get_response.spec;
let InstanceSpecStatus::Present(VersionedInstanceSpec::V0(spec)) =
spec_get_response.spec
else {
panic!("instance spec should be present for a running VM");
};

let cpuid = spec.board.cpuid.expect("board should have explicit CPUID");
assert_eq!(cpuid.entries.len(), entries.len());
Expand Down
8 changes: 6 additions & 2 deletions phd-tests/tests/src/smoke.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@
// file, You can obtain one at https://mozilla.org/MPL/2.0/.

use phd_testcase::*;
use propolis_client::types::{InstanceSpecStatus, VersionedInstanceSpec};

#[phd_testcase]
async fn nproc_test(ctx: &Framework) {
Expand Down Expand Up @@ -46,8 +47,11 @@ async fn instance_spec_get_test(ctx: &Framework) {
vm.launch().await?;

let spec_get_response = vm.get_spec().await?;
let propolis_client::types::VersionedInstanceSpec::V0(spec) =
spec_get_response.spec;
let InstanceSpecStatus::Present(spec) = spec_get_response.spec else {
panic!("launched instance should have a spec");
};

let VersionedInstanceSpec::V0(spec) = spec;
assert_eq!(spec.board.cpus, 4);
assert_eq!(spec.board.memory_mb, 3072);
}

0 comments on commit 8aa1010

Please sign in to comment.