Skip to content

Commit

Permalink
server: move migration failure injection into instance specs (#809)
Browse files Browse the repository at this point in the history
Make the server get migration failure injection device configuration from
an instance spec component instead of the config TOML. This is a prelude to
a larger change that will make the server stop depending on config TOMLs
entirely (in favor of giving clients a library they can use to convert
config TOMLs to instance spec components).

PHD still needs to be able to set different import/export failure settings
for migration sources and targets, so add the migration failure device to
the set of components that can be replaced during preamble processing.
  • Loading branch information
gjcolombo authored Dec 17, 2024
1 parent bf85e35 commit bf824f2
Show file tree
Hide file tree
Showing 11 changed files with 213 additions and 75 deletions.
38 changes: 9 additions & 29 deletions bin/propolis-server/src/lib/initializer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -783,49 +783,29 @@ impl<'a> MachineInitializer<'a> {
}

#[cfg(not(feature = "omicron-build"))]
pub fn initialize_test_devices(
&mut self,
toml_cfg: &std::collections::BTreeMap<
String,
propolis_server_config::Device,
>,
) {
pub fn initialize_test_devices(&mut self) {
use propolis::hw::testdev::{
MigrationFailureDevice, MigrationFailures,
};

if let Some(dev) = toml_cfg.get(MigrationFailureDevice::NAME) {
const FAIL_EXPORTS: &str = "fail_exports";
const FAIL_IMPORTS: &str = "fail_imports";
let fail_exports = dev
.options
.get(FAIL_EXPORTS)
.and_then(|val| val.as_integer())
.unwrap_or(0);
let fail_imports = dev
.options
.get(FAIL_IMPORTS)
.and_then(|val| val.as_integer())
.unwrap_or(0);

if fail_exports <= 0 && fail_imports <= 0 {
if let Some(mig) = &self.spec.migration_failure {
if mig.spec.fail_exports == 0 && mig.spec.fail_imports == 0 {
info!(
self.log,
"migration failure device will not fail, as both
`{FAIL_EXPORTS}` and `{FAIL_IMPORTS}` are 0";
FAIL_EXPORTS => ?fail_exports,
FAIL_IMPORTS => ?fail_imports,
"migration failure device's failure counts are both 0";
"device_spec" => ?mig.spec
);
}

let dev = MigrationFailureDevice::create(
&self.log,
MigrationFailures {
exports: fail_exports as usize,
imports: fail_imports as usize,
exports: mig.spec.fail_exports as usize,
imports: mig.spec.fail_imports as usize,
},
);
self.devices.insert(MigrationFailureDevice::NAME.into(), dev);

self.devices.insert(mig.name.clone(), dev);
}
}

Expand Down
21 changes: 21 additions & 0 deletions bin/propolis-server/src/lib/migrate/preamble.rs
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,27 @@ impl Preamble {
ComponentV0::VirtioNetworkBackend(nic.backend_spec.clone());
}

#[cfg(not(feature = "omicron-build"))]
if let Some(mig) = &target_spec.migration_failure {
let Some(to_amend) = source_spec.components.get_mut(&mig.name)
else {
return Err(MigrateError::InstanceSpecsIncompatible(format!(
"replacement component {} not in source spec",
mig.name
)));
};

if !matches!(to_amend, ComponentV0::MigrationFailureInjector(_)) {
return Err(MigrateError::InstanceSpecsIncompatible(format!(
"component {} is not a migration failure injector \
in the source spec",
mig.name
)));
}

*to_amend = ComponentV0::MigrationFailureInjector(mig.spec.clone());
}

let amended_spec =
source_spec.try_into().map_err(|e: ApiSpecError| {
MigrateError::PreambleParse(e.to_string())
Expand Down
39 changes: 35 additions & 4 deletions bin/propolis-server/src/lib/spec/api_spec_v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,9 @@ use super::{
StorageDevice,
};

#[cfg(not(feature = "omicron-build"))]
use super::MigrationFailure;

#[cfg(feature = "falcon")]
use super::SoftNpuPort;

Expand All @@ -40,9 +43,9 @@ pub(crate) enum ApiSpecError {
#[error("network backend {backend} not found for device {device}")]
NetworkBackendNotFound { backend: String, device: String },

#[cfg(not(feature = "falcon"))]
#[error("softnpu component {0} compiled out")]
SoftNpuCompiledOut(String),
#[allow(dead_code)]
#[error("support for component {component} compiled out via {feature}")]
FeatureCompiledOut { component: String, feature: &'static str },

#[error("backend {0} not used by any device")]
BackendNotUsed(String),
Expand All @@ -61,6 +64,8 @@ impl From<Spec> for InstanceSpecV0 {
serial,
pci_pci_bridges,
pvpanic,
#[cfg(not(feature = "omicron-build"))]
migration_failure,
#[cfg(feature = "falcon")]
softnpu,
} = val;
Expand Down Expand Up @@ -151,6 +156,15 @@ impl From<Spec> for InstanceSpecV0 {
);
}

#[cfg(not(feature = "omicron-build"))]
if let Some(mig) = migration_failure {
insert_component(
&mut spec,
mig.name,
ComponentV0::MigrationFailureInjector(mig.spec),
);
}

#[cfg(feature = "falcon")]
{
if let Some(softnpu_pci) = softnpu.pci_port {
Expand Down Expand Up @@ -292,12 +306,29 @@ impl TryFrom<InstanceSpecV0> for Spec {
// apply it to the builder later.
boot_settings = Some((device_name, settings));
}
#[cfg(feature = "omicron-build")]
ComponentV0::MigrationFailureInjector(_) => {
return Err(ApiSpecError::FeatureCompiledOut {
component: device_name,
feature: "omicron-build",
});
}
#[cfg(not(feature = "omicron-build"))]
ComponentV0::MigrationFailureInjector(mig) => {
builder.add_migration_failure_device(MigrationFailure {
name: device_name,
spec: mig,
})?;
}
#[cfg(not(feature = "falcon"))]
ComponentV0::SoftNpuPciPort(_)
| ComponentV0::SoftNpuPort(_)
| ComponentV0::SoftNpuP9(_)
| ComponentV0::P9fs(_) => {
return Err(ApiSpecError::SoftNpuCompiledOut(device_name));
return Err(ApiSpecError::FeatureCompiledOut {
component: device_name,
feature: "falcon",
});
}
#[cfg(feature = "falcon")]
ComponentV0::SoftNpuPciPort(port) => {
Expand Down
25 changes: 25 additions & 0 deletions bin/propolis-server/src/lib/spec/builder.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,6 +32,9 @@ use super::{
Board, BootOrderEntry, BootSettings, Disk, Nic, QemuPvpanic, SerialPort,
};

#[cfg(not(feature = "omicron-build"))]
use super::MigrationFailure;

#[cfg(feature = "falcon")]
use super::{ParsedSoftNpu, SoftNpuPort};

Expand Down Expand Up @@ -59,6 +62,10 @@ pub(crate) enum SpecBuilderError {
#[error("pvpanic device already specified")]
PvpanicInUse,

#[cfg(not(feature = "omicron-build"))]
#[error("migration failure injection already enabled")]
MigrationFailureInjectionInUse,

#[error("boot settings were already specified")]
BootSettingsInUse,

Expand Down Expand Up @@ -367,6 +374,24 @@ impl SpecBuilder {
Ok(self)
}

#[cfg(not(feature = "omicron-build"))]
pub fn add_migration_failure_device(
&mut self,
mig: MigrationFailure,
) -> Result<&Self, SpecBuilderError> {
if self.component_names.contains(&mig.name) {
return Err(SpecBuilderError::ComponentNameInUse(mig.name));
}

if self.spec.migration_failure.is_some() {
return Err(SpecBuilderError::MigrationFailureInjectionInUse);
}

self.component_names.insert(mig.name.clone());
self.spec.migration_failure = Some(mig);
Ok(self)
}

#[cfg(feature = "falcon")]
pub fn set_softnpu_com4(
&mut self,
Expand Down
13 changes: 13 additions & 0 deletions bin/propolis-server/src/lib/spec/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -34,6 +34,9 @@ use propolis_api_types::instance_spec::{
};
use thiserror::Error;

#[cfg(not(feature = "omicron-build"))]
use propolis_api_types::instance_spec::components::devices::MigrationFailureInjector;

#[cfg(feature = "falcon")]
use propolis_api_types::instance_spec::components::{
backends::DlpiNetworkBackend,
Expand Down Expand Up @@ -71,6 +74,9 @@ pub(crate) struct Spec {
pub pci_pci_bridges: HashMap<String, PciPciBridge>,
pub pvpanic: Option<QemuPvpanic>,

#[cfg(not(feature = "omicron-build"))]
pub migration_failure: Option<MigrationFailure>,

#[cfg(feature = "falcon")]
pub softnpu: SoftNpu,
}
Expand Down Expand Up @@ -278,6 +284,13 @@ pub struct QemuPvpanic {
pub spec: QemuPvpanicDesc,
}

#[cfg(not(feature = "omicron-build"))]
#[derive(Clone, Debug)]
pub struct MigrationFailure {
pub name: String,
pub spec: MigrationFailureInjector,
}

#[cfg(feature = "falcon")]
#[derive(Clone, Debug)]
pub struct SoftNpuPort {
Expand Down
2 changes: 1 addition & 1 deletion bin/propolis-server/src/lib/vm/ensure.rs
Original file line number Diff line number Diff line change
Expand Up @@ -403,7 +403,7 @@ async fn initialize_vm_objects(
init.initialize_network_devices(&chipset).await?;

#[cfg(not(feature = "omicron-build"))]
init.initialize_test_devices(&options.toml_config.devices);
init.initialize_test_devices();
#[cfg(feature = "omicron-build")]
info!(log, "`omicron-build` feature enabled, ignoring any test devices");

Expand Down
15 changes: 15 additions & 0 deletions crates/propolis-api-types/src/instance_spec/components/devices.rs
Original file line number Diff line number Diff line change
Expand Up @@ -185,3 +185,18 @@ pub struct P9fs {
/// The PCI path at which to attach the guest to this P9 filesystem.
pub pci_path: PciPath,
}

/// Describes a synthetic device that registers for VM lifecycle notifications
/// and returns errors during attempts to migrate.
///
/// This is only supported by Propolis servers compiled without the
/// `omicron-build` feature.
#[derive(Clone, Deserialize, Serialize, Debug, JsonSchema)]
#[serde(deny_unknown_fields)]
pub struct MigrationFailureInjector {
/// The number of times this device should fail requests to export state.
pub fail_exports: u32,

/// The number of times this device should fail requests to import state.
pub fail_imports: u32,
}
1 change: 1 addition & 0 deletions crates/propolis-api-types/src/instance_spec/v0.rs
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@ pub enum ComponentV0 {
SoftNpuPort(components::devices::SoftNpuPort),
SoftNpuP9(components::devices::SoftNpuP9),
P9fs(components::devices::P9fs),
MigrationFailureInjector(components::devices::MigrationFailureInjector),
CrucibleStorageBackend(components::backends::CrucibleStorageBackend),
FileStorageBackend(components::backends::FileStorageBackend),
BlobStorageBackend(components::backends::BlobStorageBackend),
Expand Down
2 changes: 1 addition & 1 deletion lib/propolis/src/hw/testdev.rs
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,7 @@ pub struct MigrationFailures {
struct MigrationFailurePayloadV1 {}

impl MigrationFailureDevice {
pub const NAME: &'static str = "test-migration-failure";
const NAME: &'static str = "test-migration-failure";

pub fn create(log: &slog::Logger, fail: MigrationFailures) -> Arc<Self> {
let log =
Expand Down
42 changes: 42 additions & 0 deletions openapi/propolis-server.json
Original file line number Diff line number Diff line change
Expand Up @@ -783,6 +783,25 @@
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
"component": {
"$ref": "#/components/schemas/MigrationFailureInjector"
},
"type": {
"type": "string",
"enum": [
"MigrationFailureInjector"
]
}
},
"required": [
"component",
"type"
],
"additionalProperties": false
},
{
"type": "object",
"properties": {
Expand Down Expand Up @@ -1571,6 +1590,29 @@
"vcr_json"
]
},
"MigrationFailureInjector": {
"description": "Describes a synthetic device that registers for VM lifecycle notifications and returns errors during attempts to migrate.\n\nThis is only supported by Propolis servers compiled without the `omicron-build` feature.",
"type": "object",
"properties": {
"fail_exports": {
"description": "The number of times this device should fail requests to export state.",
"type": "integer",
"format": "uint32",
"minimum": 0
},
"fail_imports": {
"description": "The number of times this device should fail requests to import state.",
"type": "integer",
"format": "uint32",
"minimum": 0
}
},
"required": [
"fail_exports",
"fail_imports"
],
"additionalProperties": false
},
"MigrationState": {
"type": "string",
"enum": [
Expand Down
Loading

0 comments on commit bf824f2

Please sign in to comment.