Skip to content

Commit

Permalink
vk: GPU timings
Browse files Browse the repository at this point in the history
  • Loading branch information
kvark committed Sep 24, 2024
1 parent a4215a2 commit bdfb9d9
Show file tree
Hide file tree
Showing 12 changed files with 177 additions and 20 deletions.
6 changes: 6 additions & 0 deletions blade-graphics/src/gles/command.rs
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
use std::time::Duration;

const COLOR_ATTACHMENTS: &[u32] = &[
glow::COLOR_ATTACHMENT0,
glow::COLOR_ATTACHMENT1,
Expand Down Expand Up @@ -201,6 +203,10 @@ impl super::CommandEncoder {
limits: &self.limits,
}
}

pub fn timings(&self) -> &[(String, Duration)] {
&[]
}
}

impl super::PassEncoder<'_, super::ComputePipeline> {
Expand Down
8 changes: 8 additions & 0 deletions blade-graphics/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -72,9 +72,15 @@ mod shader;
mod traits;
pub mod util;
pub mod limits {
/// Max number of passes inside a command encoder.
pub const PASS_COUNT: usize = 100;
/// Max plain data size for a pipeline.
pub const PLAIN_DATA_SIZE: u32 = 256;
/// Max number of resources in a bind group.
pub const RESOURCES_IN_GROUP: u32 = 8;
/// Min storage buffer alignment.
pub const STORAGE_BUFFER_ALIGNMENT: u64 = 256;
/// Min acceleration structure scratch buffer alignment.
pub const ACCELERATION_STRUCTURE_SCRATCH_ALIGNMENT: u64 = 256;
}

Expand All @@ -87,6 +93,8 @@ pub struct ContextDesc {
/// Enable validation of the GAPI, shaders,
/// and insert crash markers into command buffers.
pub validation: bool,
/// Enable GPU timing of all passes.
pub timing: bool,
/// Enable capture support with GAPI tools.
pub capture: bool,
/// Enable GAPI overlay.
Expand Down
6 changes: 5 additions & 1 deletion blade-graphics/src/metal/command.rs
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
use std::{marker::PhantomData, mem};
use std::{marker::PhantomData, mem, time::Duration};

impl<T: bytemuck::Pod> crate::ShaderBindable for T {
fn bind_to(&self, ctx: &mut super::PipelineContext, index: u32) {
Expand Down Expand Up @@ -226,6 +226,10 @@ impl super::CommandEncoder {
phantom: PhantomData,
}
}

pub fn timings(&self) -> &[(String, Duration)] {
&[]
}
}

#[hidden_trait::expose]
Expand Down
85 changes: 79 additions & 6 deletions blade-graphics/src/vulkan/command.rs
Original file line number Diff line number Diff line change
@@ -1,5 +1,5 @@
use ash::vk;
use std::str;
use std::{str, time::Duration};

impl super::CrashHandler {
fn add_marker(&mut self, marker: &str) -> u32 {
Expand Down Expand Up @@ -198,7 +198,7 @@ fn map_render_target(rt: &crate::RenderTarget) -> vk::RenderingAttachmentInfo<'s
}

fn end_pass(device: &super::Device, cmd_buf: vk::CommandBuffer) {
if device.toggles.command_scopes {
if device.command_scope.is_some() {
unsafe {
device.debug_utils.cmd_end_debug_utils_label(cmd_buf);
}
Expand All @@ -225,10 +225,32 @@ impl super::CommandEncoder {
}
}

fn add_timestamp(&mut self, label: &str) {
if let Some(_) = self.device.timing {
let cmd_buf = self.buffers.first_mut().unwrap();
if cmd_buf.timed_pass_names.len() == crate::limits::PASS_COUNT {
log::warn!("Reached the maximum for `limits::PASS_COUNT`, skipping the timer");
return;
}
let index = cmd_buf.timed_pass_names.len() as u32;
unsafe {
self.device.core.cmd_write_timestamp(
cmd_buf.raw,
vk::PipelineStageFlags::TOP_OF_PIPE,
cmd_buf.query_pool,
index,
);
}
cmd_buf.timed_pass_names.push(label.to_string());
}
}

fn begin_pass(&mut self, label: &str) {
self.barrier();
self.add_marker(label);
if self.device.toggles.command_scopes {
self.add_timestamp(label);

if let Some(_) = self.device.command_scope {
self.temp_label.clear();
self.temp_label.extend_from_slice(label.as_bytes());
self.temp_label.push(0);
Expand Down Expand Up @@ -260,14 +282,61 @@ impl super::CommandEncoder {
.begin_command_buffer(cmd_buf.raw, &vk_info)
.unwrap();
}

if let Some(ref timing) = self.device.timing {
self.timings.clear();
if !cmd_buf.timed_pass_names.is_empty() {
let mut timestamps = [0u64; super::QUERY_POOL_SIZE];
unsafe {
self.device
.core
.get_query_pool_results(
cmd_buf.query_pool,
0,
&mut timestamps[..cmd_buf.timed_pass_names.len() + 1],
vk::QueryResultFlags::TYPE_64,
)
.unwrap();
}
let mut prev = timestamps[0];
for (name, &ts) in cmd_buf
.timed_pass_names
.drain(..)
.zip(timestamps[1..].iter())
{
let diff = (ts - prev) as f32 * timing.period;
prev = ts;
self.timings.push((name, Duration::from_nanos(diff as _)));
}
}
unsafe {
self.device.core.cmd_reset_query_pool(
cmd_buf.raw,
cmd_buf.query_pool,
0,
super::QUERY_POOL_SIZE as u32,
);
}
}
}

pub(super) fn finish(&mut self) -> vk::CommandBuffer {
self.barrier();
self.add_marker("finish");
let raw = self.buffers[0].raw;
unsafe { self.device.core.end_command_buffer(raw).unwrap() }
raw
let cmd_buf = self.buffers.first_mut().unwrap();
unsafe {
if self.device.timing.is_some() {
let index = cmd_buf.timed_pass_names.len() as u32;
self.device.core.cmd_write_timestamp(
cmd_buf.raw,
vk::PipelineStageFlags::TOP_OF_PIPE,
cmd_buf.query_pool,
index,
);
}
self.device.core.end_command_buffer(cmd_buf.raw).unwrap();
}
cmd_buf.raw
}

fn barrier(&mut self) {
Expand Down Expand Up @@ -477,6 +546,10 @@ impl super::CommandEncoder {
Err(other) => panic!("GPU error {}", other),
}
}

pub fn timings(&self) -> &[(String, Duration)] {
&self.timings
}
}

#[hidden_trait::expose]
Expand Down
22 changes: 20 additions & 2 deletions blade-graphics/src/vulkan/init.rs
Original file line number Diff line number Diff line change
Expand Up @@ -44,6 +44,7 @@ struct AdapterCapabilities {
buffer_marker: bool,
shader_info: bool,
full_screen_exclusive: bool,
timing: bool,
bugs: SystemBugs,
}

Expand Down Expand Up @@ -202,6 +203,13 @@ unsafe fn inspect_adapter(
return None;
}

let timing = if properties.limits.timestamp_compute_and_graphics == vk::FALSE {
log::info!("No timing because of queue support");
false
} else {
true
};

let ray_tracing = if !supported_extensions.contains(&vk::KHR_ACCELERATION_STRUCTURE_NAME)
|| !supported_extensions.contains(&vk::KHR_RAY_QUERY_NAME)
{
Expand Down Expand Up @@ -269,6 +277,7 @@ unsafe fn inspect_adapter(
buffer_marker,
shader_info,
full_screen_exclusive,
timing,
bugs,
})
}
Expand Down Expand Up @@ -564,8 +573,17 @@ impl super::Context {
},
core: device_core,
device_information: capabilities.device_information,
toggles: super::Toggles {
command_scopes: desc.capture,
command_scope: if desc.capture {
Some(super::CommandScopeDevice {})
} else {
None
},
timing: if desc.timing && capabilities.timing {
Some(super::TimingDevice {
period: capabilities.properties.limits.timestamp_period,
})
} else {
None
},
//TODO: detect GPU family
workarounds: super::Workarounds {
Expand Down
39 changes: 35 additions & 4 deletions blade-graphics/src/vulkan/mod.rs
Original file line number Diff line number Diff line change
@@ -1,12 +1,14 @@
use ash::{khr, vk};
use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex};
use std::{num::NonZeroU32, path::PathBuf, ptr, sync::Mutex, time::Duration};

mod command;
mod descriptor;
mod init;
mod pipeline;
mod resource;

const QUERY_POOL_SIZE: usize = crate::limits::PASS_COUNT + 1;

struct Instance {
core: ash::Instance,
_debug_utils: ash::ext::debug_utils::Instance,
Expand All @@ -21,8 +23,10 @@ struct RayTracingDevice {
}

#[derive(Clone, Default)]
struct Toggles {
command_scopes: bool,
struct CommandScopeDevice {}
#[derive(Clone, Default)]
struct TimingDevice {
period: f32,
}

#[derive(Clone)]
Expand All @@ -43,7 +47,8 @@ struct Device {
buffer_marker: Option<ash::amd::buffer_marker::Device>,
shader_info: Option<ash::amd::shader_info::Device>,
full_screen_exclusive: Option<ash::ext::full_screen_exclusive::Device>,
toggles: Toggles,
command_scope: Option<CommandScopeDevice>,
timing: Option<TimingDevice>,
workarounds: Workarounds,
}

Expand Down Expand Up @@ -223,6 +228,8 @@ pub struct RenderPipeline {
struct CommandBuffer {
raw: vk::CommandBuffer,
descriptor_pool: descriptor::DescriptorPool,
query_pool: vk::QueryPool,
timed_pass_names: Vec<String>,
}

#[derive(Debug, PartialEq)]
Expand All @@ -246,6 +253,7 @@ pub struct CommandEncoder {
present: Option<Presentation>,
crash_handler: Option<CrashHandler>,
temp_label: Vec<u8>,
timings: Vec<(String, Duration)>,
}
pub struct TransferCommandEncoder<'a> {
raw: vk::CommandBuffer,
Expand Down Expand Up @@ -345,9 +353,24 @@ impl crate::traits::CommandDevice for Context {
self.set_object_name(raw, desc.name);
};
let descriptor_pool = self.device.create_descriptor_pool();
let query_pool = if self.device.timing.is_some() {
let query_pool_info = vk::QueryPoolCreateInfo::default()
.query_type(vk::QueryType::TIMESTAMP)
.query_count(QUERY_POOL_SIZE as u32);
unsafe {
self.device
.core
.create_query_pool(&query_pool_info, None)
.unwrap()
}
} else {
vk::QueryPool::null()
};
CommandBuffer {
raw,
descriptor_pool,
query_pool,
timed_pass_names: Vec::new(),
}
})
.collect();
Expand Down Expand Up @@ -375,6 +398,7 @@ impl crate::traits::CommandDevice for Context {
present: None,
crash_handler,
temp_label: Vec::new(),
timings: Vec::new(),
}
}

Expand All @@ -388,6 +412,13 @@ impl crate::traits::CommandDevice for Context {
}
self.device
.destroy_descriptor_pool(&mut cmd_buf.descriptor_pool);
if self.device.timing.is_some() {
unsafe {
self.device
.core
.destroy_query_pool(cmd_buf.query_pool, None);
}
}
}
unsafe {
self.device
Expand Down
1 change: 1 addition & 0 deletions examples/bunnymark/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,7 @@ impl Example {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
timing: false,
capture: false,
overlay: true,
},
Expand Down
19 changes: 16 additions & 3 deletions examples/particle/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@ impl Example {
window,
gpu::ContextDesc {
validation: cfg!(debug_assertions),
timing: true,
capture: false,
overlay: false,
},
Expand Down Expand Up @@ -113,6 +114,19 @@ impl Example {
}
self.prev_sync_point = Some(sync_point);
}

fn add_gui(&mut self, ui: &mut egui::Ui) {
ui.heading("Particle System");
self.particle_system.add_gui(ui);
ui.heading("Timings");
for &(ref name, time) in self.command_encoder.timings() {
let millis = time.as_secs_f32() * 1000.0;
ui.horizontal(|ui| {
ui.label(name);
ui.colored_label(egui::Color32::WHITE, format!("{:.2} ms", millis));
});
}
}
}

fn main() {
Expand Down Expand Up @@ -167,9 +181,8 @@ fn main() {
winit::event::WindowEvent::RedrawRequested => {
let raw_input = egui_winit.take_egui_input(&window);
let egui_output = egui_winit.egui_ctx().run(raw_input, |egui_ctx| {
egui::SidePanel::left("my_side_panel").show(egui_ctx, |ui| {
ui.heading("Particle System");
example.particle_system.add_gui(ui);
egui::SidePanel::left("info").show(egui_ctx, |ui| {
example.add_gui(ui);
if ui.button("Quit").clicked() {
target.exit();
}
Expand Down
4 changes: 4 additions & 0 deletions examples/particle/particle.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,10 @@ impl System {
pub fn destroy(&mut self, context: &gpu::Context) {
context.destroy_buffer(self.particle_buf);
context.destroy_buffer(self.free_list_buf);
context.destroy_compute_pipeline(&mut self.reset_pipeline);
context.destroy_compute_pipeline(&mut self.emit_pipeline);
context.destroy_compute_pipeline(&mut self.update_pipeline);
context.destroy_render_pipeline(&mut self.draw_pipeline);
}

fn main_data(&self) -> MainData {
Expand Down
Loading

0 comments on commit bdfb9d9

Please sign in to comment.