From bdfb9d9b7ada941285c2c500c474a02a51a10354 Mon Sep 17 00:00:00 2001 From: Dzmitry Malyshau Date: Mon, 23 Sep 2024 22:36:04 -0700 Subject: [PATCH] vk: GPU timings --- blade-graphics/src/gles/command.rs | 6 ++ blade-graphics/src/lib.rs | 8 +++ blade-graphics/src/metal/command.rs | 6 +- blade-graphics/src/vulkan/command.rs | 85 ++++++++++++++++++++++++++-- blade-graphics/src/vulkan/init.rs | 22 ++++++- blade-graphics/src/vulkan/mod.rs | 39 +++++++++++-- examples/bunnymark/main.rs | 1 + examples/particle/main.rs | 19 ++++++- examples/particle/particle.rs | 4 ++ examples/ray-query/main.rs | 3 +- examples/scene/main.rs | 3 +- src/lib.rs | 1 + 12 files changed, 177 insertions(+), 20 deletions(-) diff --git a/blade-graphics/src/gles/command.rs b/blade-graphics/src/gles/command.rs index b17b65a9..25c1258b 100644 --- a/blade-graphics/src/gles/command.rs +++ b/blade-graphics/src/gles/command.rs @@ -1,3 +1,5 @@ +use std::time::Duration; + const COLOR_ATTACHMENTS: &[u32] = &[ glow::COLOR_ATTACHMENT0, glow::COLOR_ATTACHMENT1, @@ -201,6 +203,10 @@ impl super::CommandEncoder { limits: &self.limits, } } + + pub fn timings(&self) -> &[(String, Duration)] { + &[] + } } impl super::PassEncoder<'_, super::ComputePipeline> { diff --git a/blade-graphics/src/lib.rs b/blade-graphics/src/lib.rs index 02e973f2..d354aca1 100644 --- a/blade-graphics/src/lib.rs +++ b/blade-graphics/src/lib.rs @@ -72,9 +72,15 @@ mod shader; mod traits; pub mod util; pub mod limits { + /// Max number of passes inside a command encoder. + pub const PASS_COUNT: usize = 100; + /// Max plain data size for a pipeline. pub const PLAIN_DATA_SIZE: u32 = 256; + /// Max number of resources in a bind group. pub const RESOURCES_IN_GROUP: u32 = 8; + /// Min storage buffer alignment. pub const STORAGE_BUFFER_ALIGNMENT: u64 = 256; + /// Min acceleration structure scratch buffer alignment. pub const ACCELERATION_STRUCTURE_SCRATCH_ALIGNMENT: u64 = 256; } @@ -87,6 +93,8 @@ pub struct ContextDesc { /// Enable validation of the GAPI, shaders, /// and insert crash markers into command buffers. pub validation: bool, + /// Enable GPU timing of all passes. + pub timing: bool, /// Enable capture support with GAPI tools. pub capture: bool, /// Enable GAPI overlay. diff --git a/blade-graphics/src/metal/command.rs b/blade-graphics/src/metal/command.rs index 3c048fd7..174e4bc8 100644 --- a/blade-graphics/src/metal/command.rs +++ b/blade-graphics/src/metal/command.rs @@ -1,4 +1,4 @@ -use std::{marker::PhantomData, mem}; +use std::{marker::PhantomData, mem, time::Duration}; impl crate::ShaderBindable for T { fn bind_to(&self, ctx: &mut super::PipelineContext, index: u32) { @@ -226,6 +226,10 @@ impl super::CommandEncoder { phantom: PhantomData, } } + + pub fn timings(&self) -> &[(String, Duration)] { + &[] + } } #[hidden_trait::expose] diff --git a/blade-graphics/src/vulkan/command.rs b/blade-graphics/src/vulkan/command.rs index 82a40fd7..9de98774 100644 --- a/blade-graphics/src/vulkan/command.rs +++ b/blade-graphics/src/vulkan/command.rs @@ -1,5 +1,5 @@ use ash::vk; -use std::str; +use std::{str, time::Duration}; impl super::CrashHandler { fn add_marker(&mut self, marker: &str) -> u32 { @@ -198,7 +198,7 @@ fn map_render_target(rt: &crate::RenderTarget) -> vk::RenderingAttachmentInfo<'s } fn end_pass(device: &super::Device, cmd_buf: vk::CommandBuffer) { - if device.toggles.command_scopes { + if device.command_scope.is_some() { unsafe { device.debug_utils.cmd_end_debug_utils_label(cmd_buf); } @@ -225,10 +225,32 @@ impl super::CommandEncoder { } } + fn add_timestamp(&mut self, label: &str) { + if let Some(_) = self.device.timing { + let cmd_buf = self.buffers.first_mut().unwrap(); + if cmd_buf.timed_pass_names.len() == crate::limits::PASS_COUNT { + log::warn!("Reached the maximum for `limits::PASS_COUNT`, skipping the timer"); + return; + } + let index = cmd_buf.timed_pass_names.len() as u32; + unsafe { + self.device.core.cmd_write_timestamp( + cmd_buf.raw, + vk::PipelineStageFlags::TOP_OF_PIPE, + cmd_buf.query_pool, + index, + ); + } + cmd_buf.timed_pass_names.push(label.to_string()); + } + } + fn begin_pass(&mut self, label: &str) { self.barrier(); self.add_marker(label); - if self.device.toggles.command_scopes { + self.add_timestamp(label); + + if let Some(_) = self.device.command_scope { self.temp_label.clear(); self.temp_label.extend_from_slice(label.as_bytes()); self.temp_label.push(0); @@ -260,14 +282,61 @@ impl super::CommandEncoder { .begin_command_buffer(cmd_buf.raw, &vk_info) .unwrap(); } + + if let Some(ref timing) = self.device.timing { + self.timings.clear(); + if !cmd_buf.timed_pass_names.is_empty() { + let mut timestamps = [0u64; super::QUERY_POOL_SIZE]; + unsafe { + self.device + .core + .get_query_pool_results( + cmd_buf.query_pool, + 0, + &mut timestamps[..cmd_buf.timed_pass_names.len() + 1], + vk::QueryResultFlags::TYPE_64, + ) + .unwrap(); + } + let mut prev = timestamps[0]; + for (name, &ts) in cmd_buf + .timed_pass_names + .drain(..) + .zip(timestamps[1..].iter()) + { + let diff = (ts - prev) as f32 * timing.period; + prev = ts; + self.timings.push((name, Duration::from_nanos(diff as _))); + } + } + unsafe { + self.device.core.cmd_reset_query_pool( + cmd_buf.raw, + cmd_buf.query_pool, + 0, + super::QUERY_POOL_SIZE as u32, + ); + } + } } pub(super) fn finish(&mut self) -> vk::CommandBuffer { self.barrier(); self.add_marker("finish"); - let raw = self.buffers[0].raw; - unsafe { self.device.core.end_command_buffer(raw).unwrap() } - raw + let cmd_buf = self.buffers.first_mut().unwrap(); + unsafe { + if self.device.timing.is_some() { + let index = cmd_buf.timed_pass_names.len() as u32; + self.device.core.cmd_write_timestamp( + cmd_buf.raw, + vk::PipelineStageFlags::TOP_OF_PIPE, + cmd_buf.query_pool, + index, + ); + } + self.device.core.end_command_buffer(cmd_buf.raw).unwrap(); + } + cmd_buf.raw } fn barrier(&mut self) { @@ -477,6 +546,10 @@ impl super::CommandEncoder { Err(other) => panic!("GPU error {}", other), } } + + pub fn timings(&self) -> &[(String, Duration)] { + &self.timings + } } #[hidden_trait::expose] diff --git a/blade-graphics/src/vulkan/init.rs b/blade-graphics/src/vulkan/init.rs index 487cad90..9645caa9 100644 --- a/blade-graphics/src/vulkan/init.rs +++ b/blade-graphics/src/vulkan/init.rs @@ -44,6 +44,7 @@ struct AdapterCapabilities { buffer_marker: bool, shader_info: bool, full_screen_exclusive: bool, + timing: bool, bugs: SystemBugs, } @@ -202,6 +203,13 @@ unsafe fn inspect_adapter( return None; } + let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE { + log::info!("No timing because of queue support"); + false + } else { + true + }; + let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME) || !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME) { @@ -269,6 +277,7 @@ unsafe fn inspect_adapter( buffer_marker, shader_info, full_screen_exclusive, + timing, bugs, }) } @@ -564,8 +573,17 @@ impl super::Context { }, core: device_core, device_information: capabilities.device_information, - toggles: super::Toggles { - command_scopes: desc.capture, + command_scope: if desc.capture { + Some(super::CommandScopeDevice {}) + } else { + None + }, + timing: if desc.timing && capabilities.timing { + Some(super::TimingDevice { + period: capabilities.properties.limits.timestamp_period, + }) + } else { + None }, //TODO: detect GPU family workarounds: super::Workarounds { diff --git a/blade-graphics/src/vulkan/mod.rs b/blade-graphics/src/vulkan/mod.rs index a0333070..136a8f5e 100644 --- a/blade-graphics/src/vulkan/mod.rs +++ b/blade-graphics/src/vulkan/mod.rs @@ -1,5 +1,5 @@ use ash::{khr, vk}; -use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex}; +use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration}; mod command; mod descriptor; @@ -7,6 +7,8 @@ mod init; mod pipeline; mod resource; +const QUERY_POOL_SIZE: usize = crate::limits::PASS_COUNT + 1; + struct Instance { core: ash::Instance, _debug_utils: ash::ext::debug_utils::Instance, @@ -21,8 +23,10 @@ struct RayTracingDevice { } #[derive(Clone, Default)] -struct Toggles { - command_scopes: bool, +struct CommandScopeDevice {} +#[derive(Clone, Default)] +struct TimingDevice { + period: f32, } #[derive(Clone)] @@ -43,7 +47,8 @@ struct Device { buffer_marker: Option, shader_info: Option, full_screen_exclusive: Option, - toggles: Toggles, + command_scope: Option, + timing: Option, workarounds: Workarounds, } @@ -223,6 +228,8 @@ pub struct RenderPipeline { struct CommandBuffer { raw: vk::CommandBuffer, descriptor_pool: descriptor::DescriptorPool, + query_pool: vk::QueryPool, + timed_pass_names: Vec, } #[derive(Debug, PartialEq)] @@ -246,6 +253,7 @@ pub struct CommandEncoder { present: Option, crash_handler: Option, temp_label: Vec, + timings: Vec<(String, Duration)>, } pub struct TransferCommandEncoder<'a> { raw: vk::CommandBuffer, @@ -345,9 +353,24 @@ impl crate::traits::CommandDevice for Context { self.set_object_name(raw, desc.name); }; let descriptor_pool = self.device.create_descriptor_pool(); + let query_pool = if self.device.timing.is_some() { + let query_pool_info = vk::QueryPoolCreateInfo::default() + .query_type(vk::QueryType::TIMESTAMP) + .query_count(QUERY_POOL_SIZE as u32); + unsafe { + self.device + .core + .create_query_pool(&query_pool_info, None) + .unwrap() + } + } else { + vk::QueryPool::null() + }; CommandBuffer { raw, descriptor_pool, + query_pool, + timed_pass_names: Vec::new(), } }) .collect(); @@ -375,6 +398,7 @@ impl crate::traits::CommandDevice for Context { present: None, crash_handler, temp_label: Vec::new(), + timings: Vec::new(), } } @@ -388,6 +412,13 @@ impl crate::traits::CommandDevice for Context { } self.device .destroy_descriptor_pool(&mut cmd_buf.descriptor_pool); + if self.device.timing.is_some() { + unsafe { + self.device + .core + .destroy_query_pool(cmd_buf.query_pool, None); + } + } } unsafe { self.device diff --git a/examples/bunnymark/main.rs b/examples/bunnymark/main.rs index 6b30f21b..ee93eb47 100644 --- a/examples/bunnymark/main.rs +++ b/examples/bunnymark/main.rs @@ -67,6 +67,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: false, capture: false, overlay: true, }, diff --git a/examples/particle/main.rs b/examples/particle/main.rs index da636dbc..8a2cba37 100644 --- a/examples/particle/main.rs +++ b/examples/particle/main.rs @@ -20,6 +20,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: true, capture: false, overlay: false, }, @@ -113,6 +114,19 @@ impl Example { } self.prev_sync_point = Some(sync_point); } + + fn add_gui(&mut self, ui: &mut egui::Ui) { + ui.heading("Particle System"); + self.particle_system.add_gui(ui); + ui.heading("Timings"); + for &(ref name, time) in self.command_encoder.timings() { + let millis = time.as_secs_f32() * 1000.0; + ui.horizontal(|ui| { + ui.label(name); + ui.colored_label(egui::Color32::WHITE, format!("{:.2} ms", millis)); + }); + } + } } fn main() { @@ -167,9 +181,8 @@ fn main() { winit::event::WindowEvent::RedrawRequested => { let raw_input = egui_winit.take_egui_input(&window); let egui_output = egui_winit.egui_ctx().run(raw_input, |egui_ctx| { - egui::SidePanel::left("my_side_panel").show(egui_ctx, |ui| { - ui.heading("Particle System"); - example.particle_system.add_gui(ui); + egui::SidePanel::left("info").show(egui_ctx, |ui| { + example.add_gui(ui); if ui.button("Quit").clicked() { target.exit(); } diff --git a/examples/particle/particle.rs b/examples/particle/particle.rs index 28f68b23..73145148 100644 --- a/examples/particle/particle.rs +++ b/examples/particle/particle.rs @@ -146,6 +146,10 @@ impl System { pub fn destroy(&mut self, context: &gpu::Context) { context.destroy_buffer(self.particle_buf); context.destroy_buffer(self.free_list_buf); + context.destroy_compute_pipeline(&mut self.reset_pipeline); + context.destroy_compute_pipeline(&mut self.emit_pipeline); + context.destroy_compute_pipeline(&mut self.update_pipeline); + context.destroy_render_pipeline(&mut self.draw_pipeline); } fn main_data(&self) -> MainData { diff --git a/examples/ray-query/main.rs b/examples/ray-query/main.rs index d3283769..1e16092f 100644 --- a/examples/ray-query/main.rs +++ b/examples/ray-query/main.rs @@ -51,8 +51,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), - capture: false, - overlay: false, + ..Default::default() }, ) .unwrap() diff --git a/examples/scene/main.rs b/examples/scene/main.rs index 5f8b9b99..11437619 100644 --- a/examples/scene/main.rs +++ b/examples/scene/main.rs @@ -191,8 +191,7 @@ impl Example { window, gpu::ContextDesc { validation: cfg!(debug_assertions), - capture: false, - overlay: false, + ..Default::default() }, ) .unwrap() diff --git a/src/lib.rs b/src/lib.rs index d9bdd5a0..ea293d2b 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -415,6 +415,7 @@ impl Engine { window, gpu::ContextDesc { validation: cfg!(debug_assertions), + timing: true, capture: false, overlay: false, },