diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index ca54882c..d11f5455 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -4,10 +4,16 @@ on: [push] jobs: build: + strategy: + matrix: + os: [ubuntu-latest, windows-latest] - runs-on: ubuntu-latest + runs-on: ${{ matrix.os }} steps: + - name: Disable autocrlf (Windows) + if: runner.os == 'Windows' + run: git config --global core.autocrlf false - uses: actions/checkout@v4 with: submodules: true @@ -19,14 +25,18 @@ jobs: - uses: Swatinem/rust-cache@v2 - name: Run cargo build run: cargo build --release --features default-networks - - name: Check if code is formatted + - name: Check if code is formatted (Linux) + if: runner.os == 'Linux' run: cargo fmt --check - - name: Run Clippy + - name: Run Clippy (Linux) + if: runner.os == 'Linux' run: scripts/ci/clippy.bash --deny warnings - name: Run tests run: cargo test --release --no-fail-fast - - name: Check consensus-spec-tests coverage + - name: Check consensus-spec-tests coverage (Linux) + if: runner.os == 'Linux' run: scripts/ci/consensus-spec-tests-coverage.rb # Ignore RUSTSEC-2023-0071 because we don't use RSA in `jwt-simple` - - name: Run cargo audit + - name: Run cargo audit (Linux) + if: runner.os == 'Linux' run: cargo audit --ignore RUSTSEC-2024-0370 --ignore RUSTSEC-2023-0071 diff --git a/Cargo.toml b/Cargo.toml index 7183fb16..9c415647 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -504,6 +504,7 @@ try_from_iterator = { path = 'try_from_iterator' } types = { path = 'types' } validator = { path = 'validator' } validator_key_cache = { path = 'validator_key_cache' } +winsafe = { git = 'https://github.com/rodrigocfd/winsafe', features = ['kernel', 'psapi'] } # Banned crates # diff --git a/ad_hoc_bench/Cargo.toml b/ad_hoc_bench/Cargo.toml index f5ea5c3b..725078b9 100644 --- a/ad_hoc_bench/Cargo.toml +++ b/ad_hoc_bench/Cargo.toml @@ -16,7 +16,9 @@ eth2_cache_utils = { workspace = true } fork_choice_control = { workspace = true } fork_choice_store = { workspace = true } futures = { workspace = true } -jemalloc-ctl = { workspace = true } log = { workspace = true } rand = { workspace = true } types = { workspace = true } + +[target.'cfg(not(windows))'.dependencies] +jemalloc-ctl = { workspace = true } \ No newline at end of file diff --git a/ad_hoc_bench/src/main.rs b/ad_hoc_bench/src/main.rs index 3254d347..b7368b29 100644 --- a/ad_hoc_bench/src/main.rs +++ b/ad_hoc_bench/src/main.rs @@ -2,13 +2,11 @@ use core::ops::RangeInclusive; use std::{collections::BTreeMap, sync::Arc, time::Instant}; use allocator as _; -use anyhow::{Error, Result}; -use bytesize::ByteSize; +use anyhow::Result; use clap::{Parser, ValueEnum}; use eth2_cache_utils::{goerli, holesky, holesky_devnet, mainnet, medalla, withdrawal_devnet_4}; use fork_choice_control::AdHocBenchController; use fork_choice_store::StoreConfig; -use jemalloc_ctl::Result as JemallocResult; use log::info; use rand::seq::SliceRandom as _; use types::{ @@ -247,7 +245,7 @@ impl From for BlockParameters { fn main() -> Result<()> { binary_utils::initialize_logger(module_path!(), false)?; binary_utils::initialize_rayon()?; - + #[cfg(not(target_os = "windows"))] print_jemalloc_stats()?; let options = Options::parse(); @@ -296,7 +294,7 @@ fn main() -> Result<()> { |_, _| BTreeMap::new(), ), }?; - + #[cfg(not(target_os = "windows"))] print_jemalloc_stats()?; Ok(()) @@ -312,6 +310,7 @@ fn run( beacon_blocks: impl FnOnce(RangeInclusive, usize) -> Vec>>, blob_sidecars: impl FnOnce(RangeInclusive, usize) -> BTreeMap>>>, ) -> Result<()> { + #[cfg(not(target_os = "windows"))] print_jemalloc_stats()?; let Options { @@ -422,11 +421,15 @@ fn run( info!("average block throughput: {block_throughput:.3} blocks/s"); info!("average slot throughput: {slot_throughput:.3} slots/s"); - print_jemalloc_stats() + #[cfg(not(target_os = "windows"))] + print_jemalloc_stats()?; + + Ok(()) } +#[cfg(not(target_os = "windows"))] fn print_jemalloc_stats() -> Result<()> { - jemalloc_ctl::epoch::advance().map_err(Error::msg)?; + jemalloc_ctl::epoch::advance().map_err(anyhow::Error::msg)?; info!( "allocated: {}, \ @@ -445,9 +448,9 @@ fn print_jemalloc_stats() -> Result<()> { Ok(()) } - -fn human_readable_size(result: JemallocResult) -> Result { - let size = result.map_err(Error::msg)?; +#[cfg(not(target_os = "windows"))] +fn human_readable_size(result: jemalloc_ctl::Result) -> Result { + let size = result.map_err(anyhow::Error::msg)?; let size = size.try_into()?; - Ok(ByteSize(size).to_string_as(true)) + Ok(bytesize::ByteSize(size).to_string_as(true)) } diff --git a/metrics/Cargo.toml b/metrics/Cargo.toml index 69be48e3..0c4f7815 100644 --- a/metrics/Cargo.toml +++ b/metrics/Cargo.toml @@ -20,7 +20,6 @@ futures = { workspace = true } grandine_version = { workspace = true } helper_functions = { workspace = true } http_api_utils = { workspace = true } -jemalloc-ctl = { workspace = true } log = { workspace = true } num_threads = { workspace = true } p2p = { workspace = true } @@ -28,7 +27,6 @@ prometheus = { workspace = true } # `prometheus-client` is only needed for libp2p metrics. prometheus-client = { workspace = true } prometheus_metrics = { workspace = true } -psutil = { workspace = true } reqwest = { workspace = true } serde = { workspace = true } std_ext = { workspace = true } @@ -40,5 +38,12 @@ tower-http = { workspace = true } transition_functions = { workspace = true } types = { workspace = true } +[target.'cfg(not(windows))'.dependencies] +jemalloc-ctl = { workspace = true } +psutil = { workspace = true } + +[target.'cfg(windows)'.dependencies] +winsafe = { workspace = true } + [dev-dependencies] serde_json = { workspace = true } diff --git a/metrics/src/beaconchain.rs b/metrics/src/beaconchain.rs index 4d2a3cc0..68017866 100644 --- a/metrics/src/beaconchain.rs +++ b/metrics/src/beaconchain.rs @@ -15,7 +15,6 @@ use helper_functions::{accessors, predicates}; use log::warn; use p2p::metrics::PEERS_CONNECTED; use prometheus::IntGauge; -use psutil::{cpu::CpuTimes, process::Process}; use serde::Serialize; use sysinfo::{Disks, System}; use types::{preset::Preset, traits::BeaconState}; @@ -88,29 +87,13 @@ pub struct ProcessMetrics { impl ProcessMetrics { pub fn get() -> Self { - let mut cpu_process_seconds_total = 0; - let mut memory_process_bytes = 0; - - match Process::current() { - Ok(process) => { - match process.cpu_times() { - Ok(cpu_times) => { - cpu_process_seconds_total = cpu_times.busy().as_secs() - + cpu_times.children_system().as_secs() - + cpu_times.children_system().as_secs(); - } - Err(error) => warn!("unable to get current process CPU usage: {error:?}"), - } - - match process.memory_info() { - Ok(mem_info) => { - memory_process_bytes = mem_info.rss(); - } - Err(error) => warn!("unable to get process memory usage: {error:?}"), - } - } - Err(error) => warn!("unable to get current process: {error:?}"), - } + let Ok(crate::metric_sys::ProcessCpuMetric { + cpu_process_seconds_total, + memory_process_bytes, + }) = crate::metric_sys::get_process_cpu_metric() + else { + panic!("unable to get current process's cpu metric"); + }; let client_build = DateTime::parse_from_rfc3339(build_time_utc!()) .expect("unable to parse build time") @@ -286,12 +269,16 @@ struct PlatformSpecificSystemMetrics { impl PlatformSpecificSystemMetrics { #[cfg(not(target_os = "linux"))] - fn new(_cpu: Option<&CpuTimes>) -> Self { + fn new() -> Self { Self::default() } #[cfg(target_os = "linux")] - fn new(cpu: Option<&CpuTimes>) -> Self { + fn new() -> Self { + let cpu_times = psutil::cpu::cpu_times() + .map_err(|error| warn!("unable to get CPU times information: {error:?}")) + .ok(); + let cpu = cpu_times.as_ref(); let mem = psutil::memory::virtual_memory() .map_err(|error| warn!("unable to get virtual memory information: {error:?}")) .ok(); @@ -325,19 +312,17 @@ impl SystemMetrics { let (network_node_bytes_total_receive, network_node_bytes_total_transmit) = helpers::get_network_bytes(); - let cpu_times = psutil::cpu::cpu_times() - .map_err(|error| warn!("unable to get CPU times information: {error:?}")) - .ok(); - - let cpu = cpu_times.as_ref(); + let Ok(cpu_metric) = crate::metric_sys::get_cpu_metric() else { + panic!("unable to get current system's cpu metric"); + }; Self { // CPU cpu_cores: system.physical_core_count().unwrap_or_default(), cpu_threads: system.cpus().len(), - cpu_node_system_seconds_total: cpu.map(CpuTimes::total).unwrap_or_default().as_secs(), - cpu_node_user_seconds_total: cpu.map(CpuTimes::user).unwrap_or_default().as_secs(), - cpu_node_idle_seconds_total: cpu.map(CpuTimes::idle).unwrap_or_default().as_secs(), + cpu_node_system_seconds_total: cpu_metric.system_seconds, + cpu_node_user_seconds_total: cpu_metric.user_seconds, + cpu_node_idle_seconds_total: cpu_metric.idle_seconds, // memory memory_node_bytes_total: system.total_memory(), @@ -359,7 +344,7 @@ impl SystemMetrics { misc_os: metrics_os(), // platform specific metrics - platform_specific_metrics: PlatformSpecificSystemMetrics::new(cpu), + platform_specific_metrics: PlatformSpecificSystemMetrics::new(), } } } diff --git a/metrics/src/lib.rs b/metrics/src/lib.rs index 891cd4cb..93227543 100644 --- a/metrics/src/lib.rs +++ b/metrics/src/lib.rs @@ -8,5 +8,6 @@ mod beaconchain; mod gui; mod helpers; mod messages; +mod metric_sys; mod server; mod service; diff --git a/metrics/src/metric_sys.rs b/metrics/src/metric_sys.rs new file mode 100644 index 00000000..01567ca0 --- /dev/null +++ b/metrics/src/metric_sys.rs @@ -0,0 +1,129 @@ +use anyhow::{Ok, Result}; +#[cfg(target_os = "windows")] +use { + log::debug, + winsafe::{self as w, prelude::*}, +}; +#[cfg(target_os = "linux")] +use { + log::warn, + psutil::cpu::{self, CpuTimes}, +}; + +pub struct ProcessCpuMetric { + pub cpu_process_seconds_total: u64, + pub memory_process_bytes: u64, +} + +#[cfg(target_os = "linux")] +pub fn get_process_cpu_metric() -> Result { + use anyhow::bail; + use core::result::Result::Ok; + use psutil::process::Process; + #[allow(unused_assignments)] + let mut cpu_process_seconds_total = 0; + #[allow(unused_assignments)] + let mut memory_process_bytes = 0; + match Process::current() { + Ok(process) => { + match process.cpu_times() { + Ok(cpu_times) => { + cpu_process_seconds_total = cpu_times.busy().as_secs() + + cpu_times.children_system().as_secs() + + cpu_times.children_system().as_secs(); + } + Err(error) => bail!("unable to get current process CPU usage: {error:?}"), + } + + match process.memory_info() { + Ok(mem_info) => { + memory_process_bytes = mem_info.rss(); + } + Err(error) => bail!("unable to get process memory usage: {error:?}"), + } + } + Err(error) => bail!("unable to get current process: {error:?}"), + } + Ok(ProcessCpuMetric { + cpu_process_seconds_total, + memory_process_bytes, + }) +} + +#[allow(unused_assignments)] +#[cfg(target_os = "windows")] +pub fn get_process_cpu_metric() -> Result { + let proc = w::HPROCESS::GetCurrentProcess(); + + // Get CPU times + let (_, _, kernel, user) = proc.GetProcessTimes()?; + let kernel_seconds = filetime_to_seconds(kernel); + let user_seconds = filetime_to_seconds(user); + let cpu_process_seconds_total = kernel_seconds + user_seconds; + debug!("CPU time: {:.2} seconds", cpu_process_seconds_total); + + // Get memory info + let mem_info = proc.GetProcessMemoryInfo()?; + let memory_process_bytes = mem_info.WorkingSetSize.try_into()?; + debug!("memory usage: {} bytes", memory_process_bytes); + + Ok(ProcessCpuMetric { + cpu_process_seconds_total, + memory_process_bytes, + }) +} + +#[derive(Debug)] +pub struct CpuMetric { + pub idle_seconds: u64, + pub system_seconds: u64, + pub user_seconds: u64, +} + +// TODO maybe work for MacOS or wider Unix? +#[cfg(target_os = "linux")] +pub fn get_cpu_metric() -> Result { + let cpu_times = cpu::cpu_times() + .map_err(|error| warn!("unable to get CPU times information: {error:?}")) + .ok(); + let cpu = cpu_times.as_ref(); + let system_seconds = cpu.map(CpuTimes::total).unwrap_or_default().as_secs(); + let user_seconds = cpu.map(CpuTimes::user).unwrap_or_default().as_secs(); + let idle_seconds = cpu.map(CpuTimes::idle).unwrap_or_default().as_secs(); + Ok(CpuMetric { + idle_seconds, + system_seconds, + user_seconds, + }) +} + +#[cfg(target_os = "windows")] +pub fn get_cpu_metric() -> Result { + let (idle_time, kernel_time, user_time) = w::GetSystemTimes()?; + + // Convert FILETIME to u64 (100-nanosecond intervals) + let idle = filetime_to_seconds(idle_time); + let kernel = filetime_to_seconds(kernel_time); + let user = filetime_to_seconds(user_time); + + // Calculate system time (kernel time includes idle time) + let system = kernel - idle; + + Ok(CpuMetric { + idle_seconds: idle, + system_seconds: system, + user_seconds: user, + }) +} + +// NOTE: +// FILETIME: Contains a 64-bit value representing the number of 100-nanosecond intervals since January 1, 1601 (UTC). (Ref: https://learn.microsoft.com/en-us/windows/win32/api/minwinbase/ns-minwinbase-filetime) +// But some Windows APIs just use it to represent a relative time interval, a.k.a., duration. +// For example, GetSystemTimes like APIs are used in this mod. +// The helper function just converts the number of 100-nanosecond into the number of seconds +#[inline(always)] +#[cfg(target_os = "windows")] +fn filetime_to_seconds(ft: w::FILETIME) -> u64 { + let total_seconds = ((ft.dwHighDateTime as u64) << 32 | ft.dwLowDateTime as u64) / 10_000_000; + total_seconds +} diff --git a/metrics/src/server.rs b/metrics/src/server.rs index 85f6f8ef..d44a8ba5 100644 --- a/metrics/src/server.rs +++ b/metrics/src/server.rs @@ -190,7 +190,7 @@ pub async fn prometheus_metrics( if let Err(error) = scrape_system_stats(metrics.clone_arc(), metrics_to_metrics_tx).await { warn!("Unable to scrape system stats: {error:?}"); } - + #[cfg(not(target_os = "windows"))] if let Err(error) = scrape_jemalloc_stats(&metrics) { warn!("Unable to scrape jemalloc stats: {error:?}"); } @@ -291,6 +291,7 @@ async fn scrape_system_stats( Ok(()) } +#[cfg(not(target_os = "windows"))] fn scrape_jemalloc_stats(metrics: &Arc) -> Result<()> { jemalloc_ctl::epoch::advance().map_err(AnyhowError::msg)?;