diff --git a/Cargo.lock b/Cargo.lock index 28fe09d67..a7bc82541 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -664,6 +664,17 @@ dependencies = [ "libc", ] +[[package]] +name = "cpuid_profile_config" +version = "0.0.0" +dependencies = [ + "propolis", + "serde", + "serde_derive", + "thiserror", + "toml 0.7.6", +] + [[package]] name = "crc32fast" version = "1.3.2" @@ -3031,6 +3042,7 @@ dependencies = [ "crucible-client-types", "dladm", "dlpi", + "enum-iterator", "erased-serde", "futures", "ispf", @@ -3174,6 +3186,7 @@ dependencies = [ name = "propolis-server-config" version = "0.0.0" dependencies = [ + "cpuid_profile_config", "serde", "serde_derive", "thiserror", @@ -3212,6 +3225,7 @@ dependencies = [ name = "propolis-standalone-config" version = "0.0.0" dependencies = [ + "cpuid_profile_config", "num_enum 0.5.11", "serde", "serde_derive", diff --git a/Cargo.toml b/Cargo.toml index e32306364..bd5f42f5c 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -43,6 +43,7 @@ panic = "abort" # Internal crates bhyve_api = { path = "crates/bhyve-api" } bhyve_api_sys = { path = "crates/bhyve-api/sys" } +cpuid_profile_config = { path = "crates/cpuid-profile-config" } dladm = { path = "crates/dladm" } propolis-server-config = { path = "crates/propolis-server-config" } propolis-standalone-config = { path = "crates/propolis-standalone-config" } diff --git a/bin/propolis-server/src/lib/spec.rs b/bin/propolis-server/src/lib/spec.rs index 5c8dca073..271bc4883 100644 --- a/bin/propolis-server/src/lib/spec.rs +++ b/bin/propolis-server/src/lib/spec.rs @@ -603,13 +603,11 @@ impl ServerSpecBuilder { #[cfg(test)] mod test { - use std::{collections::BTreeMap, path::PathBuf}; - use crucible_client_types::VolumeConstructionRequest; use propolis_client::handmade::api::Slot; use uuid::Uuid; - use crate::config::{self, Config}; + use crate::config::Config; use super::*; @@ -625,13 +623,7 @@ mod test { memory: 512, vcpus: 4, }, - &Config::new( - PathBuf::from_str("").unwrap(), - config::Chipset::default(), - BTreeMap::new(), - BTreeMap::new(), - Vec::new(), - ), + &Config::default(), ) } diff --git a/bin/propolis-standalone/README.md b/bin/propolis-standalone/README.md index ee16dfbf5..a82d7173e 100644 --- a/bin/propolis-standalone/README.md +++ b/bin/propolis-standalone/README.md @@ -101,9 +101,10 @@ fi After you've got the bootrom, an ISO, a VNIC, and a configuration file that points to them, you're ready to create and run your VM. To do so, make sure you've done the following: -- [build propolis](#Building) -- run the [propolis-server](#propolis-server) -- create your VM, run it, and hop on the serial console using [propolis-cli](#propolis-cli) +- build `propolis-standalone` +- start `propolis-standalone`, passing it a valid config +- it will wait to start the VM until you connect to the serial console socket + (with something like [sercons](https://github.com/jclulow/vmware-sercons)) - login to the VM as root (no password) - optionally, run `setup-alpine` to configure the VM (including setting a root password) @@ -190,3 +191,63 @@ generation = 1 # read_only = false # === END OPTIONAL OPTIONS === ``` +## Configuring `cpuid` + +Rather than using the built-in `cpuid` data masking offered by the bhyve kernel +VMM, propolis-standalone can load a set of leaf data to be used by the instance. +An example of such configuration data is as follows: + +```toml +[main] +# ... other main config bits +cpuid_profile = "NAME" + +[cpuid.NAME] +vendor = "amd" +"0" = [0x10, 0x68747541, 0x444d4163, 0x69746e65] +"1" = [0x830f10, 0x10800, 0xf6d83203, 0x178bfbff] +"5" = [0x0, 0x0, 0x0, 0x0] +"6" = [0x4, 0x0, 0x0, 0x0] +"7" = [0x0, 0x0, 0x0, 0x0] +"7-0" = [0x0, 0x201401a9, 0x0, 0x0] +"d" = [0x0, 0x0, 0x0, 0x0] +"d-0" = [0x7, 0x340, 0x340, 0x0] +"d-1" = [0x1, 0x0, 0x0, 0x0] +"d-2" = [0x100, 0x240, 0x0, 0x0] +"80000000" = [0x80000020, 0x68747541, 0x444d4163, 0x69746e65] +"80000001" = [0x830f10, 0x40000000, 0x444031fb, 0x25d3fbff] +"80000002" = [0x20444d41, 0x43595045, 0x38323720, 0x36312032] +"80000003" = [0x726f432d, 0x72502065, 0x7365636f, 0x20726f73] +"80000004" = [0x20202020, 0x20202020, 0x20202020, 0x202020] +"80000005" = [0xff40ff40, 0xff40ff40, 0x20080140, 0x20080140] +"80000006" = [0x48006400, 0x68006400, 0x2006140, 0x2009140] +"80000007" = [0x0, 0x0, 0x0, 0x100] +"80000008" = [0x3030, 0x7, 0x0, 0x10000] +"8000000a" = [0x1, 0x8000, 0x0, 0x13bcff] +"80000019" = [0xf040f040, 0x0, 0x0, 0x0] +"8000001a" = [0x6, 0x0, 0x0, 0x0] +"8000001b" = [0x3ff, 0x0, 0x0, 0x0] +"8000001d" = [0x0, 0x0, 0x0, 0x0] +"8000001d-0" = [0x121, 0x1c0003f, 0x3f, 0x0] +"8000001d-1" = [0x122, 0x1c0003f, 0x3f, 0x0] +"8000001d-2" = [0x143, 0x1c0003f, 0x3ff, 0x2] +"8000001d-3" = [0x163, 0x3c0003f, 0x3fff, 0x1] +"8000001f" = [0x1000f, 0x16f, 0x1fd, 0x1] +``` + +If `cpuid_profile` is specified under the `main` section, a corresponding +`cpuid` section with a matching name is expected to be defined elsewhere in the +file. The `vendor` field under that section controls fallback behavior when a +vCPU queries a non-existent leaf, and other CPU-specific behavior. After that, +the leafs and their register data are listed. Leafs which require an `ecx` +match (with `eax` as the function, and `ecx` as the index) are specified with a +hyphen separating the function and index. Leafs without an index (just a single +hex number) will match only against `eax`, and at a lower priority than the +function/index leafs which match `eax` and `ecx`. The data for leafs is +expected to be a 4-item array of 32-bit integers corresponding to `eax`, `ebx`, +`ecx`, and `edx`, in that order. + +Certain fields in `cpuid` data depend on aspects specific to the host (such as +vCPU count) or the vCPU they are associated with (such as APIC ID). Propolis +will "specialize" the data provided in the `cpuid` profile with logic appropriate +for the specific leafs involved. diff --git a/bin/propolis-standalone/src/config.rs b/bin/propolis-standalone/src/config.rs index 6d612063e..45cd7465b 100644 --- a/bin/propolis-standalone/src/config.rs +++ b/bin/propolis-standalone/src/config.rs @@ -11,11 +11,12 @@ use anyhow::Context; use serde::Deserialize; use propolis::block; +use propolis::cpuid; use propolis::hw::pci::Bdf; use propolis::inventory::ChildRegister; -use propolis_standalone_config::Device; pub use propolis_standalone_config::{Config, SnapshotTag}; +use propolis_standalone_config::{CpuVendor, CpuidEntry, Device}; #[derive(Deserialize)] struct FileConfig { @@ -119,6 +120,34 @@ pub fn parse_bdf(v: &str) -> Option { } } +pub fn parse_cpuid(config: &Config) -> anyhow::Result> { + if let Some(profile) = config.cpuid_profile() { + let vendor = match profile.vendor { + CpuVendor::Amd => cpuid::VendorKind::Amd, + CpuVendor::Intel => cpuid::VendorKind::Intel, + }; + let mut set = cpuid::Set::new(vendor); + let entries: Vec = profile.try_into()?; + for entry in entries { + let conflict = set.insert( + cpuid::Ident(entry.func, entry.idx), + cpuid::Entry::from(entry.values), + ); + + if conflict.is_some() { + anyhow::bail!( + "conflicing entry at func:{:#?} idx:{:#?}", + entry.func, + entry.idx + ) + } + } + Ok(Some(set)) + } else { + Ok(None) + } +} + #[cfg(feature = "crucible")] fn create_crucible_backend( be: &propolis_standalone_config::BlockDevice, diff --git a/bin/propolis-standalone/src/main.rs b/bin/propolis-standalone/src/main.rs index ad56862e7..5cbf4f37e 100644 --- a/bin/propolis-standalone/src/main.rs +++ b/bin/propolis-standalone/src/main.rs @@ -923,7 +923,26 @@ fn setup_instance( inv.register(&fwcfg_dev)?; inv.register(&ramfb)?; + let cpuid_profile = config::parse_cpuid(&config)?; + for vcpu in machine.vcpus.iter() { + let vcpu_profile = if let Some(profile) = cpuid_profile.as_ref() { + propolis::cpuid::Specializer::new() + .with_vcpu_count( + std::num::NonZeroU8::new(config.main.cpus).unwrap(), + true, + ) + .with_vcpuid(vcpu.id) + .with_cache_topo() + .clear_cpu_topo(cpuid::TopoKind::all()) + .execute(profile.clone()) + .context("failed to specialize cpuid profile")? + } else { + // An empty set will instruct the kernel to use the legacy + // fallback behavior + propolis::cpuid::Set::new_host() + }; + vcpu.set_cpuid(vcpu_profile)?; vcpu.set_default_capabs()?; } drop(guard); diff --git a/bin/propolis-utils/README.md b/bin/propolis-utils/README.md index 449725779..e5d5b2c79 100644 --- a/bin/propolis-utils/README.md +++ b/bin/propolis-utils/README.md @@ -7,7 +7,7 @@ development activities, but are otherwise not meant for general consumption. - `savex`: Extract data fields from a `propolis-standalone` instance which was halted and saved with the `-s` flag. -- `cpuid-gen`: Generated CPUID profile using the legacy emulated output from the +- `cpuid-gen`: Generated `cpuid` profile using the legacy emulated output from the local host CPU, as filtered by the kernel VMM logic. - `rsrvrctl`: Manipulate the kernel VMM memory reservoir in the same manner offered by the utility shipped by the OS diff --git a/bin/propolis-utils/src/bin/cpuid-gen.rs b/bin/propolis-utils/src/bin/cpuid-gen.rs index 38706ebb2..73f68cbfc 100644 --- a/bin/propolis-utils/src/bin/cpuid-gen.rs +++ b/bin/propolis-utils/src/bin/cpuid-gen.rs @@ -96,7 +96,7 @@ impl PartialOrd for CpuidKey { } } -/// Query CPUID through bhyve-defined masks +/// Query `cpuid` through bhyve-defined masks fn query_cpuid(vm: &VmmFd, eax: u32, ecx: u32) -> anyhow::Result { let mut data = bhyve_api::vm_legacy_cpuid { vlc_eax: eax, @@ -107,7 +107,7 @@ fn query_cpuid(vm: &VmmFd, eax: u32, ecx: u32) -> anyhow::Result { Ok(Cpuid::from(&data)) } -/// Query CPUID directly from host CPU +/// Query `cpuid` directly from host CPU fn query_raw_cpuid(eax: u32, ecx: u32) -> Cpuid { let mut res = Cpuid::default(); diff --git a/crates/bhyve-api/src/lib.rs b/crates/bhyve-api/src/lib.rs index 9f4f72664..f3868af40 100644 --- a/crates/bhyve-api/src/lib.rs +++ b/crates/bhyve-api/src/lib.rs @@ -578,7 +578,7 @@ pub enum ApiVersion { /// Made hlt-on-exit a required CPU feature, and enabled by default in vmm V6 = 6, - /// Adds ability to control CPUID results for guest vCPUs + /// Adds ability to control `cpuid` results for guest vCPUs V5 = 5, } impl ApiVersion { diff --git a/crates/bhyve-api/sys/src/structs.rs b/crates/bhyve-api/sys/src/structs.rs index 8805ae3ab..b9b9b4339 100644 --- a/crates/bhyve-api/sys/src/structs.rs +++ b/crates/bhyve-api/sys/src/structs.rs @@ -392,16 +392,57 @@ impl Default for vm_data_xfer { pub const VCE_FLAG_MATCH_INDEX: u32 = 1 << 0; #[repr(C)] -#[derive(Copy, Clone)] -struct vcpu_cpuid_entry { - vce_function: u32, - vce_index: u32, - vce_flags: u32, - vce_eax: u32, - vce_ebx: u32, - vce_ecx: u32, - vce_edx: u32, - _pad: u32, +#[derive(Copy, Clone, Default)] +pub struct vcpu_cpuid_entry { + pub vce_function: u32, + pub vce_index: u32, + pub vce_flags: u32, + pub vce_eax: u32, + pub vce_ebx: u32, + pub vce_ecx: u32, + pub vce_edx: u32, + pub _pad: u32, +} +impl vcpu_cpuid_entry { + fn match_idx(&self) -> bool { + self.vce_flags & VCE_FLAG_MATCH_INDEX != 0 + } + /// Order entries for proper cpuid evaluation by the kernel VMM. + /// + /// Bhyve expects that cpuid entries are sorted by function, and then index, + /// from least to greatest. Entries which must match on index should come + /// before (less-than) those that do not, so the former can take precedence + /// in matching. + /// + /// This function is provided so that a list of entries can be easily sorted + /// prior to loading them into the kernel VMM. + /// + /// ``` + /// let mut entries: Vec = vec![ + /// // entries loaded here + /// ]; + /// entries.sort_by(vcpu_cpuid_entry::eval_sort); + /// let config = vm_vcpu_cpuid_config { + /// vvcc_cpuid: 0, + /// vvcc_flags: 0, + /// vvcc_nent: entries.len(), + /// vvcc_entries: &mut entries, + /// }; + /// // perform ioctl(VM_SET_CPUID, &config) ... + /// ``` + pub fn eval_sort(a: &Self, b: &Self) -> std::cmp::Ordering { + use std::cmp::Ordering; + + match a.vce_function.cmp(&b.vce_function) { + Ordering::Equal => match (a.match_idx(), b.match_idx()) { + (true, false) => Ordering::Less, + (false, true) => Ordering::Greater, + (true, true) | (false, false) => a.vce_index.cmp(&b.vce_index), + }, + + ord => ord, + } + } } /// Use legacy hard-coded cpuid masking tables applied to the host CPU @@ -421,6 +462,17 @@ pub struct vm_vcpu_cpuid_config { pub _pad: u32, pub vvcc_entries: *mut c_void, } +impl Default for vm_vcpu_cpuid_config { + fn default() -> Self { + Self { + vvcc_vcpuid: 0, + vvcc_flags: 0, + vvcc_nent: 0, + _pad: 0, + vvcc_entries: std::ptr::null_mut(), + } + } +} #[repr(C)] #[derive(Copy, Clone, Default)] diff --git a/crates/cpuid-profile-config/Cargo.toml b/crates/cpuid-profile-config/Cargo.toml new file mode 100644 index 000000000..10f3f135a --- /dev/null +++ b/crates/cpuid-profile-config/Cargo.toml @@ -0,0 +1,16 @@ +[package] +name = "cpuid_profile_config" +version = "0.0.0" +license = "MPL-2.0" +edition = "2021" + +[lib] +test = false +doctest = false + +[dependencies] +serde.workspace = true +serde_derive.workspace = true +toml.workspace = true +propolis.workspace = true +thiserror.workspace = true diff --git a/crates/cpuid-profile-config/src/lib.rs b/crates/cpuid-profile-config/src/lib.rs new file mode 100644 index 000000000..0a0f6ff4a --- /dev/null +++ b/crates/cpuid-profile-config/src/lib.rs @@ -0,0 +1,85 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +use std::collections::BTreeMap; + +use serde::{Deserialize, Serialize}; + +#[derive(Deserialize, Serialize, Copy, Clone, Debug, PartialEq)] +#[serde(rename_all = "lowercase")] +pub enum CpuVendor { + Amd, + Intel, +} + +#[derive(Serialize, Deserialize, Debug, PartialEq, Clone)] +pub struct CpuidProfile { + pub vendor: CpuVendor, + #[serde(flatten, default)] + pub leaf: BTreeMap, +} + +/// `cpuid` entry parsed from a configured profile +#[derive(Copy, Clone)] +pub struct CpuidEntry { + /// Function (eax) to match for `cpuid` leaf + pub func: u32, + /// Index (ecx) to (optionally) match for `cpuid` leaf + pub idx: Option, + + /// Values (eax, ebx, ecx, edx) for `cpuid` leaf + pub values: [u32; 4], +} + +#[derive(Debug, thiserror::Error)] +pub enum CpuidParseError { + #[error("Unable to parse leaf {0}: {1}")] + Leaf(String, std::num::ParseIntError), + #[error("Unable to parse values: {0}")] + Values(&'static str), +} + +impl TryFrom<&CpuidProfile> for Vec { + type Error = CpuidParseError; + + fn try_from(value: &CpuidProfile) -> Result { + let mut entries = Vec::with_capacity(value.leaf.len()); + + for (leaf, values) in value.leaf.iter() { + let (func, idx) = match leaf.split_once('-') { + None => ( + u32::from_str_radix(leaf, 16) + .map_err(|e| CpuidParseError::Leaf(leaf.clone(), e))?, + None, + ), + Some((func_part, idx_part)) => ( + u32::from_str_radix(func_part, 16) + .map_err(|e| CpuidParseError::Leaf(leaf.clone(), e))?, + Some( + u32::from_str_radix(idx_part, 16).map_err(|e| { + CpuidParseError::Leaf(leaf.clone(), e) + })?, + ), + ), + }; + let raw_regs = values + .as_array() + .ok_or(CpuidParseError::Values("expected array of values"))?; + if raw_regs.len() != 4 { + return Err(CpuidParseError::Values("expected 4 cpuid values")); + } + let mut values = [0u32; 4]; + for (v, raw) in values.iter_mut().zip(raw_regs.iter()) { + let num = raw.as_integer().ok_or(CpuidParseError::Values( + "leaf values must be numeric", + ))?; + *v = u32::try_from(num).map_err(|_e| { + CpuidParseError::Values("leaf values must be valid u32") + })?; + } + entries.push(CpuidEntry { func, idx, values }); + } + Ok(entries) + } +} diff --git a/crates/propolis-server-config/Cargo.toml b/crates/propolis-server-config/Cargo.toml index 26b4503a4..e5259e418 100644 --- a/crates/propolis-server-config/Cargo.toml +++ b/crates/propolis-server-config/Cargo.toml @@ -9,6 +9,7 @@ test = false doctest = false [dependencies] +cpuid_profile_config.workspace = true serde.workspace = true serde_derive.workspace = true toml.workspace = true diff --git a/crates/propolis-server-config/src/lib.rs b/crates/propolis-server-config/src/lib.rs index 1dbed1eae..0776d684f 100644 --- a/crates/propolis-server-config/src/lib.rs +++ b/crates/propolis-server-config/src/lib.rs @@ -9,6 +9,8 @@ use std::str::FromStr; use serde_derive::{Deserialize, Serialize}; use thiserror::Error; +pub use cpuid_profile_config::CpuidProfile; + /// Configuration for the Propolis server. // NOTE: This is expected to change over time; portions of the hard-coded // configuration will likely become more dynamic. @@ -27,26 +29,19 @@ pub struct Config { #[serde(default, rename = "block_dev")] pub block_devs: BTreeMap, + + #[serde(default, rename = "cpuid")] + pub cpuid_profiles: BTreeMap, } -impl Config { - /// Constructs a new configuration object. - /// - /// Typically, the configuration is parsed from a config - /// file via [`parse`], but this method allows an alternative - /// mechanism for initialization. - pub fn new>( - bootrom: P, - chipset: Chipset, - devices: BTreeMap, - block_devs: BTreeMap, - pci_bridges: Vec, - ) -> Config { - Config { - bootrom: bootrom.into(), - pci_bridges, - chipset, - devices, - block_devs, +impl Default for Config { + fn default() -> Self { + Self { + bootrom: PathBuf::new(), + pci_bridges: Vec::new(), + chipset: Chipset { options: BTreeMap::new() }, + devices: BTreeMap::new(), + block_devs: BTreeMap::new(), + cpuid_profiles: BTreeMap::new(), } } } @@ -153,15 +148,68 @@ mod tests { #[test] fn config_can_be_serialized_as_toml() { - let dummy_config = Config::new( - "/boot", - Chipset { options: BTreeMap::new() }, - BTreeMap::new(), - BTreeMap::new(), - Vec::new(), - ); + let dummy_config = + Config { bootrom: "/boot".into(), ..Default::default() }; let serialized = toml::ser::to_string(&dummy_config).unwrap(); let deserialized: Config = toml::de::from_str(&serialized).unwrap(); assert_eq!(dummy_config, deserialized); } + + #[test] + fn parse_basic_config() { + let raw = r#" +bootrom = "/path/to/bootrom" +[chipset] +chipset-opt = "copt" + +[dev.drv0] +driver = "nvme" +other-opt = "value" + +[dev.drv1] +driver = "widget" +foo = "bar" + +[block_dev.block0] +type = "cement" +slump = "4in" + +[block_dev.block1] +type = "file" +path = "/etc/passwd" +"#; + let cfg: Config = toml::de::from_str(raw).unwrap(); + + use std::path::PathBuf; + use toml::Value; + + assert_eq!(cfg.bootrom, PathBuf::from("/path/to/bootrom")); + assert_eq!(cfg.chipset.get_string("chipset-opt"), Some("copt")); + + assert!(cfg.devices.get("drv0").is_some()); + assert!(cfg.devices.get("drv1").is_some()); + let dev0 = cfg.devices.get("drv0").unwrap(); + let dev1 = cfg.devices.get("drv1").unwrap(); + + assert_eq!(dev0.driver, "nvme"); + assert_eq!(dev0.get_string("other-opt"), Some("value")); + assert_eq!(dev1.driver, "widget"); + assert_eq!(dev1.get_string("foo"), Some("bar")); + + assert!(cfg.block_devs.get("block0").is_some()); + assert!(cfg.block_devs.get("block1").is_some()); + let bdev0 = cfg.block_devs.get("block0").unwrap(); + let bdev1 = cfg.block_devs.get("block1").unwrap(); + + assert_eq!(bdev0.bdtype, "cement"); + assert_eq!( + bdev0.options.get("slump").map(Value::as_str).unwrap(), + Some("4in") + ); + assert_eq!(bdev1.bdtype, "file"); + assert_eq!( + bdev1.options.get("path").map(Value::as_str).unwrap(), + Some("/etc/passwd") + ); + } } diff --git a/crates/propolis-standalone-config/Cargo.toml b/crates/propolis-standalone-config/Cargo.toml index 13036edf0..b5eadccbd 100644 --- a/crates/propolis-standalone-config/Cargo.toml +++ b/crates/propolis-standalone-config/Cargo.toml @@ -9,6 +9,7 @@ test = false doctest = false [dependencies] +cpuid_profile_config.workspace = true num_enum.workspace = true serde.workspace = true serde_derive.workspace = true diff --git a/crates/propolis-standalone-config/src/lib.rs b/crates/propolis-standalone-config/src/lib.rs index cfec31465..b37f39b60 100644 --- a/crates/propolis-standalone-config/src/lib.rs +++ b/crates/propolis-standalone-config/src/lib.rs @@ -7,6 +7,8 @@ use std::collections::BTreeMap; use num_enum::{IntoPrimitive, TryFromPrimitive}; use serde::{Deserialize, Serialize}; +pub use cpuid_profile_config::*; + #[derive(TryFromPrimitive, IntoPrimitive, Eq, PartialEq)] #[repr(u8)] pub enum SnapshotTag { @@ -26,6 +28,17 @@ pub struct Config { #[serde(default, rename = "block_dev")] pub block_devs: BTreeMap, + + #[serde(default, rename = "cpuid")] + pub cpuid_profiles: BTreeMap, +} +impl Config { + pub fn cpuid_profile(&self) -> Option<&CpuidProfile> { + match self.main.cpuid_profile.as_ref() { + Some(name) => self.cpuid_profiles.get(name), + None => None, + } + } } #[derive(Clone, Debug, Deserialize, Serialize)] @@ -35,6 +48,7 @@ pub struct Main { pub bootrom: String, pub memory: usize, pub use_reservoir: Option, + pub cpuid_profile: Option, } /// A hard-coded device, either enabled by default or accessible locally diff --git a/lib/propolis/Cargo.toml b/lib/propolis/Cargo.toml index 9a94f7eee..aec678b2a 100644 --- a/lib/propolis/Cargo.toml +++ b/lib/propolis/Cargo.toml @@ -11,6 +11,7 @@ bitflags.workspace = true bitstruct.workspace = true byteorder.workspace = true lazy_static.workspace = true +enum-iterator.workspace = true num_enum.workspace = true thiserror.workspace = true bhyve_api.workspace = true diff --git a/lib/propolis/src/cpuid.rs b/lib/propolis/src/cpuid.rs new file mode 100644 index 000000000..b35ecda1e --- /dev/null +++ b/lib/propolis/src/cpuid.rs @@ -0,0 +1,454 @@ +// This Source Code Form is subject to the terms of the Mozilla Public +// License, v. 2.0. If a copy of the MPL was not distributed with this +// file, You can obtain one at https://mozilla.org/MPL/2.0/. + +#![allow(unused)] + +use std::cmp::Ordering; +use std::collections::BTreeMap; +use std::collections::BTreeSet; +use std::num::NonZeroU8; +use std::ops::Bound; + +use bhyve_api::vcpu_cpuid_entry; +use enum_iterator::Sequence; + +/// Values for a cpuid leaf +#[derive(Copy, Clone, Debug)] +pub struct Entry { + pub eax: u32, + pub ebx: u32, + pub ecx: u32, + pub edx: u32, +} +impl Entry { + pub fn zero() -> Self { + Self { eax: 0, ebx: 0, ecx: 0, edx: 0 } + } +} +impl From<[u32; 4]> for Entry { + fn from(value: [u32; 4]) -> Self { + Self { eax: value[0], ebx: value[1], ecx: value[2], edx: value[3] } + } +} + +/// Matching criteria for function (%eax) and sub-function (%ecx) to identify a +/// specific leaf of cpuid information +#[derive(Eq, PartialEq, Ord, PartialOrd, Copy, Clone, Debug)] +pub struct Ident( + /// Function (%eax) value + pub u32, + /// Sub-function (%ecx) value, if any + pub Option, +); + +/// Set of cpuid leafs +#[derive(Clone)] +pub struct Set { + map: BTreeMap, + pub vendor: VendorKind, +} + +impl Set { + pub fn new(vendor: VendorKind) -> Self { + Set { map: BTreeMap::new(), vendor } + } + pub fn new_host() -> Self { + let vendor = VendorKind::try_from(host_query(Ident(0, None))) + .expect("host CPU should be from recognized vendor"); + Self::new(vendor) + } + + pub fn insert(&mut self, ident: Ident, entry: Entry) -> Option { + self.map.insert(ident, entry) + } + pub fn remove(&mut self, ident: Ident) -> Option { + self.map.remove(&ident) + } + pub fn remove_all(&mut self, func: u32) { + self.map.retain(|ident, _val| ident.0 != func); + } + pub fn get(&self, ident: Ident) -> Option<&Entry> { + self.map.get(&ident) + } + pub fn get_mut(&mut self, ident: Ident) -> Option<&mut Entry> { + self.map.get_mut(&ident) + } + pub fn is_empty(&self) -> bool { + self.map.is_empty() + } + + pub fn iter(&self) -> Iter { + Iter(self.map.iter()) + } + + pub fn for_regs(&self, eax: u32, ecx: u32) -> Option { + if let Some(ent) = self.map.get(&Ident(eax, Some(ecx))) { + // Exact match + Some(*ent) + } else if let Some(ent) = self.map.get(&Ident(eax, None)) { + // Function-only match + Some(*ent) + } else { + None + } + } +} +impl From for Vec { + fn from(value: Set) -> Self { + let mut out = Vec::with_capacity(value.map.len()); + out.extend(value.map.iter().map(|(ident, leaf)| { + let vce_flags = match ident.1.as_ref() { + Some(_) => bhyve_api::VCE_FLAG_MATCH_INDEX, + None => 0, + }; + bhyve_api::vcpu_cpuid_entry { + vce_function: ident.0, + vce_index: ident.1.unwrap_or(0), + vce_flags, + vce_eax: leaf.eax, + vce_ebx: leaf.ebx, + vce_ecx: leaf.ecx, + vce_edx: leaf.edx, + ..Default::default() + } + })); + out + } +} + +/// Convert a [vcpu_cpuid_entry](bhyve_api::vcpu_cpuid_entry) into an ([Ident], +/// [Entry]) tuple, suitable for insertion into a [Set]. +/// +/// This would be implemented as a [From] trait if rust let us. +pub fn from_raw(value: bhyve_api::vcpu_cpuid_entry) -> (Ident, Entry) { + let idx = if value.vce_flags & bhyve_api::VCE_FLAG_MATCH_INDEX != 0 { + Some(value.vce_index) + } else { + None + }; + + ( + Ident(value.vce_function, idx), + Entry { + eax: value.vce_eax, + ebx: value.vce_ebx, + ecx: value.vce_ecx, + edx: value.vce_edx, + }, + ) +} + +pub struct Iter<'a>(std::collections::btree_map::Iter<'a, Ident, Entry>); +impl<'a> Iterator for Iter<'a> { + type Item = (&'a Ident, &'a Entry); + + fn next(&mut self) -> Option { + self.0.next() + } +} + +#[derive(Debug, thiserror::Error)] +pub enum SpecializeError { + #[error("unsupported cache level")] + UnsupportedCacheLevel, + #[error("missing vcpu count")] + MissingVcpuCount, +} + +/// Specialize a set of cpuid leafs for provided attributes. +/// +/// This includes things such as a CPU topology (cores/threads/etc), a given +/// vCPU ID (APIC, core/thread ID, etc), or other info tidbits. +#[derive(Default)] +pub struct Specializer { + has_smt: bool, + num_vcpu: Option, + vcpuid: Option, + vendor_kind: Option, + cpu_topo_populate: BTreeSet, + cpu_topo_clear: BTreeSet, + do_cache_topo: bool, +} +impl Specializer { + pub fn new() -> Self { + Self::default() + } + + /// Specify number of vCPUs in instance, and if SMT is enabled + pub fn with_vcpu_count(self, count: NonZeroU8, has_smt: bool) -> Self { + Self { num_vcpu: Some(count), has_smt, ..self } + } + + /// Specify vCPU ID to specialize for + pub fn with_vcpuid(self, vcpuid: i32) -> Self { + assert!((vcpuid as usize) < bhyve_api::VM_MAXCPU); + Self { vcpuid: Some(vcpuid), ..self } + } + + /// Specify CPU vendor + pub fn with_vendor(self, vendor: VendorKind) -> Self { + Self { vendor_kind: Some(vendor), ..self } + } + + /// Specify CPU topology types to render into the specialized [Set] + /// + /// Without basic information such as the number of vCPUs (set by + /// [`Self::with_vcpu_count()`]), population of the requested topology + /// information may be incomplete. + pub fn with_cpu_topo( + self, + populate: impl Iterator, + ) -> Self { + let mut cpu_topo_populate = BTreeSet::new(); + + for t in populate { + cpu_topo_populate.insert(t); + } + + Self { cpu_topo_populate, ..self } + } + + /// Specify CPU topology types to clear from the specialized [Set] + /// + /// Some leafs in the provided set may not match expectations for the given + /// CPU vendor. Without populating it with generated data (via + /// [`Self::with_cpu_topo()`]), those leafs can be cleared out. + pub fn clear_cpu_topo(self, clear: impl Iterator) -> Self { + let mut cpu_topo_clear = BTreeSet::new(); + for t in clear { + cpu_topo_clear.insert(t); + } + + Self { cpu_topo_clear, ..self } + } + + /// Update cache topology information for specified vCPU count and SMT + /// capabilities + pub fn with_cache_topo(self) -> Self { + Self { do_cache_topo: true, ..self } + } + + /// Given the attributes and modifiers specified in this [Specializer], + /// render an updated [Set] reflecting those data. + pub fn execute(self, mut set: Set) -> Result { + // Use vendor override if provided, or else the existing one + if let Some(vendor) = self.vendor_kind { + set.vendor = vendor; + } + match set.vendor { + VendorKind::Amd => { + if self.do_cache_topo && self.num_vcpu.is_some() { + self.fix_amd_cache_topo(&mut set)?; + } + } + _ => {} + } + + // apply any requested topo info fixups + self.fix_cpu_topo(&mut set)?; + + // APIC ID based on vcpuid + if let Some(vcpuid) = self.vcpuid.as_ref() { + if let Some(ent) = set.get_mut(Ident(0x1, None)) { + // bits 31:24 contain initial APIC ID + ent.ebx &= !0xff000000; + ent.ebx |= ((*vcpuid as u32) & 0xff) << 24; + } + } + + // logical CPU count (if SMT is enabled) + if let Some(num_vcpu) = self.num_vcpu.as_ref() { + if self.has_smt { + if let Some(ent) = set.get_mut(Ident(0x1, None)) { + ent.edx |= (0x1 << 28); + // bits 23:16 contain max IDs for logical CPUs in package + ent.ebx &= !0xff0000; + ent.ebx |= (num_vcpu.get() as u32) << 16; + } + } + } + + Ok(set) + } + + fn fix_amd_cache_topo(&self, set: &mut Set) -> Result<(), SpecializeError> { + assert!(self.do_cache_topo); + let num = self.num_vcpu.unwrap().get(); + for ecx in 0..u32::MAX { + match set.get_mut(Ident(0x8000001d, Some(ecx))) { + None => break, + Some(vals) => { + // bits 7:5 hold the cache level + let visible_count = match (vals.eax & 0b11100000 >> 5) { + 0b001 | 0b010 => { + // L1/L2 shared by SMT siblings + if self.has_smt { + 2 + } else { + 1 + } + } + 0b011 => { + // L3 shared by all vCPUs + // TODO: segregate by sockets, if configured + num as u32 + } + _ => { + // unceremonious handling of unexpected cache levels + return Err(SpecializeError::UnsupportedCacheLevel); + } + }; + // the number of logical CPUs (minus 1) sharing this cache + // is stored in bits 25:14 + vals.eax &= !(0xfff << 14); + vals.eax |= (visible_count - 1) << 14; + } + } + } + Ok(()) + } + fn fix_cpu_topo(&self, set: &mut Set) -> Result<(), SpecializeError> { + for topo in self.cpu_topo_populate.union(&self.cpu_topo_clear) { + // Nuke any existing info in order to potentially override it + let leaf = *topo as u32; + set.remove_all(leaf); + + if !self.cpu_topo_populate.contains(topo) { + continue; + } + + let num_vcpu = self + .num_vcpu + .ok_or(SpecializeError::MissingVcpuCount) + .map(|n| n.get() as u32)?; + + match topo { + TopoKind::StdB => { + // Queries with invalid ecx will get all-zeroes + set.insert(Ident(leaf, None), Entry::zero()); + if self.has_smt { + set.insert( + Ident(leaf, Some(0)), + Entry { + eax: 0x1, + ebx: 0x2, + ecx: 0x100, + // TODO: populate with x2APIC ID + edx: 0x0, + }, + ); + } else { + set.insert( + Ident(leaf, Some(0)), + Entry { + eax: 0x0, + ebx: 0x1, + ecx: 0x100, + // TODO: populate with x2APIC ID + edx: 0x0, + }, + ); + } + set.insert( + Ident(leaf, Some(1)), + Entry { + eax: 0x0, + ebx: num_vcpu, + ecx: 0x201, + // TODO: populate with x2APIC ID + edx: 0x0, + }, + ); + } + TopoKind::Std1F => { + // TODO: add 0x1f topo info + } + TopoKind::Ext1E => { + let id = self.vcpuid.unwrap_or(0) as u32; + let mut ebx = id; + if self.has_smt { + // bits 15:8 hold the zero-based threads-per-compute-unit + ebx |= 0x100; + } + set.insert( + Ident(leaf, None), + Entry { + eax: id, + ebx, + // TODO: populate ecx info? + ecx: 0, + edx: 0, + }, + ); + } + } + } + Ok(()) + } +} + +/// Flavors of CPU topology information +#[derive(Clone, Copy, Ord, PartialOrd, Eq, PartialEq, Sequence)] +pub enum TopoKind { + /// Leaf 0xB AMD (and legacy on Intel) + StdB = 0xb, + /// Leaf 0x1F (Intel) + Std1F = 0x1f, + /// LEAF 0x8000001E (AMD) + Ext1E = 0x8000001e, +} +impl TopoKind { + pub fn all() -> enum_iterator::All { + enum_iterator::all() + } +} + +/// Flavors of CPU vendor for cpuid specialization +#[derive(Clone, Copy)] +pub enum VendorKind { + Amd, + Intel, +} +impl VendorKind { + pub fn is_intel(self) -> bool { + matches!(self, VendorKind::Intel) + } +} +impl TryFrom for VendorKind { + type Error = &'static str; + + fn try_from(value: Entry) -> Result { + match (value.ebx, value.ecx, value.edx) { + // AuthenticAmd + (0x68747541, 0x444d4163, 0x69746e65) => Ok(VendorKind::Amd), + // GenuineIntel + (0x756e6547, 0x6c65746e, 0x49656e69) => Ok(VendorKind::Intel), + _ => Err("unrecognized vendor"), + } + } +} + +#[cfg(any(target_arch = "x86", target_arch = "x86_64"))] +pub fn host_query(ident: Ident) -> Entry { + let mut res = Entry::zero(); + + unsafe { + std::arch::asm!( + "push rbx", + "cpuid", + "mov {0:e}, ebx", + "pop rbx", + out(reg) res.ebx, + // select cpuid 0, also specify eax as clobbered + inout("eax") ident.0 => res.eax, + inout("ecx") ident.1.unwrap_or(0) => res.ecx, + out("edx") res.edx, + ); + } + res +} +#[cfg(not(any(target_arch = "x86", target_arch = "x86_64")))] +pub fn host_query(_ident: Ident) -> Entry { + panic!("this is not going to work on non-x86") +} diff --git a/lib/propolis/src/lib.rs b/lib/propolis/src/lib.rs index de3140d75..35a0f26c1 100644 --- a/lib/propolis/src/lib.rs +++ b/lib/propolis/src/lib.rs @@ -2,8 +2,12 @@ // License, v. 2.0. If a copy of the MPL was not distributed with this // file, You can obtain one at https://mozilla.org/MPL/2.0/. -#![allow(clippy::style)] -#![allow(clippy::drop_non_drop)] +#![allow( + clippy::style, + + // Propolis will only ever be built as 64-bit, so wider enums are acceptable + clippy::enum_clike_unportable_variant +)] pub extern crate bhyve_api; pub extern crate usdt; @@ -15,6 +19,7 @@ pub mod api_version; pub mod block; pub mod chardev; pub mod common; +pub mod cpuid; pub mod exits; pub mod hw; pub mod instance; diff --git a/lib/propolis/src/vcpu.rs b/lib/propolis/src/vcpu.rs index 07fb4034e..9a69c184b 100644 --- a/lib/propolis/src/vcpu.rs +++ b/lib/propolis/src/vcpu.rs @@ -4,9 +4,10 @@ //! Virtual CPU functionality. -use std::io::Result; +use std::io::{Error, ErrorKind, Result}; use std::sync::Arc; +use crate::cpuid; use crate::exits::*; use crate::inventory::Entity; use crate::migrate::*; @@ -139,6 +140,106 @@ impl Vcpu { Ok(req.desc) } + /// Configure the (in-kernel) `cpuid` emulation state for this vCPU. + /// + /// If `values` contains no cpuid entries, then legacy emulation handling + /// will be used. + pub fn set_cpuid(&self, values: cpuid::Set) -> Result<()> { + let mut config = bhyve_api::vm_vcpu_cpuid_config { + vvcc_vcpuid: self.id, + ..Default::default() + }; + if values.is_empty() { + config.vvcc_flags = bhyve_api::VCC_FLAG_LEGACY_HANDLING; + unsafe { + self.hdl.ioctl(bhyve_api::VM_SET_CPUID, &mut config)?; + } + } else { + if values.vendor.is_intel() { + config.vvcc_flags |= bhyve_api::VCC_FLAG_INTEL_FALLBACK; + } + let mut entries: Vec = values.into(); + entries.sort_by(bhyve_api::vcpu_cpuid_entry::eval_sort); + config.vvcc_nent = entries.len() as u32; + config.vvcc_entries = entries.as_mut_ptr() as *mut libc::c_void; + unsafe { + self.hdl.ioctl(bhyve_api::VM_SET_CPUID, &mut config)?; + } + } + + Ok(()) + } + + /// Query the configured (in-kernel) `cpuid` emulation state for this vCPU. + /// + /// If legacy cpuid handling is configured, the resulting [Set](cpuid::Set) + /// will contain no entries. + pub fn get_cpuid(&self) -> Result { + let mut config = bhyve_api::vm_vcpu_cpuid_config { + vvcc_vcpuid: self.id, + vvcc_nent: 0, + ..Default::default() + }; + // Query the number of entries configured in-kernel + // + // We expect an error (E2BIG) when attempting a VM_GET_CPUID with a + // vvcc_nent which falls below the number of entries stored in the + // kernel. When that occurs, vvcc_nent will be updated with that + // existing count so we may allocate an array to receive it on a + // subsquent ioctl. + let count = match unsafe { + self.hdl.ioctl(bhyve_api::VM_GET_CPUID, &mut config) + } { + Err(_) if config.vvcc_nent != 0 => Ok(config.vvcc_nent), + Ok(_) => { + assert_eq!(config.vvcc_nent, 0); + Ok(0) + } + Err(e) => Err(e), + }?; + + let mut entries = Vec::with_capacity(count as usize); + entries.fill(bhyve_api::vcpu_cpuid_entry::default()); + config.vvcc_entries = entries.as_mut_ptr() as *mut libc::c_void; + unsafe { + self.hdl.ioctl(bhyve_api::VM_GET_CPUID, &mut config)?; + } + + if config.vvcc_flags & bhyve_api::VCC_FLAG_LEGACY_HANDLING != 0 { + // Since the legacy handling takes care of vendor-specific handling + // (by nature of doing the cpuid queries against the host CPU) it + // ignores the INTEL_FALLBACK flag. We must determine the vendor + // kind by querying it. + let vendor = cpuid::VendorKind::try_from(cpuid::host_query( + cpuid::Ident(0, None), + )) + .map_err(|e| Error::new(ErrorKind::InvalidData, e.to_string()))?; + + return Ok(cpuid::Set::new(vendor)); + } + let intel_fallback = + config.vvcc_flags & bhyve_api::VCC_FLAG_INTEL_FALLBACK != 0; + let mut set = cpuid::Set::new(match intel_fallback { + true => cpuid::VendorKind::Intel, + false => cpuid::VendorKind::Amd, + }); + + for entry in entries { + let (ident, value) = cpuid::from_raw(entry); + let conflict = set.insert(ident, value); + if conflict.is_some() { + return Err(Error::new( + ErrorKind::InvalidData, + format!( + "conflicting entry at eax:{:x} ecx:{:x?})", + ident.0, ident.1 + ), + )); + } + } + Ok(set) + } + /// Issues a command to reset all state for the virtual CPU (including registers and /// pending interrupts). pub fn reboot_state(&self) -> Result<()> { @@ -353,6 +454,7 @@ impl MigrateMulti for Vcpu { output.push(migrate::VcpuMsrsV1::read(self)?.into())?; output.push(migrate::FpuStateV1::read(self)?.into())?; output.push(migrate::LapicV1::read(self)?.into())?; + output.push(migrate::CpuidV1::read(self)?.into())?; Ok(()) } @@ -370,6 +472,7 @@ impl MigrateMulti for Vcpu { let ms_regs: migrate::VcpuMsrsV1 = offer.take()?; let fpu: migrate::FpuStateV1 = offer.take()?; let lapic: migrate::LapicV1 = offer.take()?; + let cpuid: migrate::CpuidV1 = offer.take()?; run_state.write(self)?; gp_regs.write(self)?; @@ -379,6 +482,7 @@ impl MigrateMulti for Vcpu { ms_regs.write(self)?; fpu.write(self)?; lapic.write(self)?; + cpuid.write(self)?; Ok(()) } @@ -389,6 +493,7 @@ pub mod migrate { use std::{convert::TryInto, io}; use super::Vcpu; + use crate::cpuid; use crate::migrate::*; use bhyve_api::{vdi_field_entry_v1, vm_reg_name}; @@ -570,6 +675,84 @@ pub mod migrate { pub dcr_timer: u32, } + #[derive(Copy, Clone, Default, Deserialize, Serialize)] + pub struct CpuidEntV1 { + pub func: u32, + pub idx: Option, + pub data: [u32; 4], + } + impl From for (cpuid::Ident, cpuid::Entry) { + fn from(value: CpuidEntV1) -> Self { + ( + cpuid::Ident(value.func, value.idx), + cpuid::Entry { + eax: value.data[0], + ebx: value.data[1], + ecx: value.data[2], + edx: value.data[3], + }, + ) + } + } + + #[derive(Copy, Clone, Deserialize, Serialize)] + #[serde(rename_all = "lowercase")] + pub enum CpuidVendorV1 { + Amd, + Intel, + } + impl From for CpuidVendorV1 { + fn from(value: cpuid::VendorKind) -> Self { + match value { + cpuid::VendorKind::Amd => Self::Amd, + cpuid::VendorKind::Intel => Self::Intel, + } + } + } + impl From for cpuid::VendorKind { + fn from(value: CpuidVendorV1) -> Self { + match value { + CpuidVendorV1::Amd => Self::Amd, + CpuidVendorV1::Intel => Self::Intel, + } + } + } + + #[derive(Clone, Deserialize, Serialize)] + pub struct CpuidV1 { + pub vendor: CpuidVendorV1, + pub entries: Vec, + } + impl Schema<'_> for CpuidV1 { + fn id() -> SchemaId { + ("bhyve-x86-cpuid", 1) + } + } + impl From for CpuidV1 { + fn from(value: cpuid::Set) -> Self { + let vendor = value.vendor.into(); + let entries: Vec<_> = value + .iter() + .map(|(k, v)| CpuidEntV1 { + func: k.0, + idx: k.1, + data: [v.eax, v.ebx, v.ecx, v.edx], + }) + .collect(); + CpuidV1 { vendor, entries } + } + } + impl From for cpuid::Set { + fn from(value: CpuidV1) -> Self { + let mut set = cpuid::Set::new(value.vendor.into()); + for item in value.entries { + let (ident, value) = item.into(); + set.insert(ident, value); + } + set + } + } + impl From<(bhyve_api::seg_desc, u16)> for SegDesc { fn from(value: (bhyve_api::seg_desc, u16)) -> Self { let (desc, selector) = value; @@ -1081,6 +1264,15 @@ pub mod migrate { Ok(()) } } + impl VcpuReadWrite for CpuidV1 { + fn read(vcpu: &Vcpu) -> Result { + Ok(vcpu.get_cpuid()?.into()) + } + + fn write(self, vcpu: &Vcpu) -> Result<()> { + vcpu.set_cpuid(self.into()) + } + } } mod bits { diff --git a/phd-tests/framework/src/test_vm/vm_config.rs b/phd-tests/framework/src/test_vm/vm_config.rs index acf490543..f3139b6f2 100644 --- a/phd-tests/framework/src/test_vm/vm_config.rs +++ b/phd-tests/framework/src/test_vm/vm_config.rs @@ -5,7 +5,6 @@ //! Structures that express how a VM should be configured. use std::{ - collections::BTreeMap, io::Write, path::{Path, PathBuf}, sync::Arc, @@ -175,13 +174,10 @@ impl ConfigRequest { } fn write_config_toml(&self, toml_path: &Path) -> anyhow::Result<()> { - let config = config::Config::new( - self.bootrom_path.clone(), - config::Chipset { options: BTreeMap::default() }, - BTreeMap::new(), - BTreeMap::new(), - Vec::new(), - ); + let config = config::Config { + bootrom: self.bootrom_path.clone(), + ..Default::default() + }; let serialized = toml::ser::to_string(&config).unwrap(); let mut cfg_file = std::fs::OpenOptions::new()