Skip to content

Commit

Permalink
Metal encoder & pass timestamp support (gfx-rs#4008)
Browse files Browse the repository at this point in the history
Implements timer queries via write_timestamp on Metal for encoders (whenever timer queries are available) and passes (for Intel/AMD GPUs, where we should advertise TIMESTAMP_QUERY_INSIDE_PASSES now).

Due to some bugs in Metal this was a lot harder than expected. I believe the solution is close to optimal with the current restrictions in place. For details see code comments.
  • Loading branch information
Wumpf authored and bradwerth committed Sep 19, 2023
1 parent 2f5dc46 commit 2003dd9
Show file tree
Hide file tree
Showing 9 changed files with 260 additions and 61 deletions.
1 change: 1 addition & 0 deletions .deny.toml
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@ allow = [
[sources]
allow-git = [
"https://github.com/grovesNL/glow",
"https://github.com/gfx-rs/metal-rs",
]
unknown-registry = "deny"
unknown-git = "deny"
Expand Down
4 changes: 4 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -88,6 +88,10 @@ By @Valaphee in [#3402](https://github.com/gfx-rs/wgpu/pull/3402)
### Documentation
- Use WGSL for VertexFormat example types. By @ScanMountGoat in [#4305](https://github.com/gfx-rs/wgpu/pull/4035)

#### Metal

- Support for timestamp queries on encoders and passes. By @wumpf in [#4008](https://github.com/gfx-rs/wgpu/pull/4008)

### Bug Fixes

#### General
Expand Down
3 changes: 1 addition & 2 deletions Cargo.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

2 changes: 2 additions & 0 deletions Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -158,6 +158,8 @@ termcolor = "1.2.0"
#glow = { path = "../glow" }
#d3d12 = { path = "../d3d12-rs" }
#metal = { path = "../metal-rs" }
#metal = { path = "../metal-rs" }
metal = { git = "https://github.com/gfx-rs/metal-rs/", rev = "d24f1a4" } # More timer support via https://github.com/gfx-rs/metal-rs/pull/280
#web-sys = { path = "../wasm-bindgen/crates/web-sys" }
#js-sys = { path = "../wasm-bindgen/crates/js-sys" }
#wasm-bindgen = { path = "../wasm-bindgen" }
Expand Down
18 changes: 8 additions & 10 deletions examples/timestamp-queries/src/main.rs
Original file line number Diff line number Diff line change
Expand Up @@ -47,6 +47,7 @@ impl QueryResults {
// * compute end
const NUM_QUERIES: u64 = 8;

#[allow(clippy::redundant_closure)] // False positive
fn from_raw_results(timestamps: Vec<u64>, timestamps_inside_passes: bool) -> Self {
assert_eq!(timestamps.len(), Self::NUM_QUERIES as usize);

Expand All @@ -60,9 +61,9 @@ impl QueryResults {
let mut encoder_timestamps = [0, 0];
encoder_timestamps[0] = get_next_slot();
let render_start_end_timestamps = [get_next_slot(), get_next_slot()];
let render_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
let render_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
let compute_start_end_timestamps = [get_next_slot(), get_next_slot()];
let compute_inside_timestamp = timestamps_inside_passes.then_some(get_next_slot());
let compute_inside_timestamp = timestamps_inside_passes.then(|| get_next_slot());
encoder_timestamps[1] = get_next_slot();

QueryResults {
Expand All @@ -79,8 +80,8 @@ impl QueryResults {
let elapsed_us = |start, end: u64| end.wrapping_sub(start) as f64 * period as f64 / 1000.0;

println!(
"Elapsed time render + compute: {:.2} μs",
elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1])
"Elapsed time before render until after compute: {:.2} μs",
elapsed_us(self.encoder_timestamps[0], self.encoder_timestamps[1]),
);
println!(
"Elapsed time render pass: {:.2} μs",
Expand Down Expand Up @@ -464,13 +465,10 @@ mod tests {
render_start_end_timestamps[1].wrapping_sub(render_start_end_timestamps[0]);
let compute_delta =
compute_start_end_timestamps[1].wrapping_sub(compute_start_end_timestamps[0]);
let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);

// TODO: Metal encoder timestamps aren't implemented yet.
if ctx.adapter.get_info().backend != wgpu::Backend::Metal {
let encoder_delta = encoder_timestamps[1].wrapping_sub(encoder_timestamps[0]);
assert!(encoder_delta > 0);
assert!(encoder_delta >= render_delta + compute_delta);
}
assert!(encoder_delta > 0);
assert!(encoder_delta >= render_delta + compute_delta);

if let Some(render_inside_timestamp) = render_inside_timestamp {
assert!(render_inside_timestamp >= render_start_end_timestamps[0]);
Expand Down
46 changes: 33 additions & 13 deletions wgpu-hal/src/metal/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,8 @@ use wgt::{AstcBlock, AstcChannel};

use std::{sync::Arc, thread};

use super::TimestampQuerySupport;

const MAX_COMMAND_BUFFERS: u64 = 2048;

unsafe impl Send for super::Adapter {}
Expand Down Expand Up @@ -536,6 +538,26 @@ impl super::PrivateCapabilities {
MTLReadWriteTextureTier::TierNone
};

let mut timestamp_query_support = TimestampQuerySupport::empty();
if version.at_least((11, 0), (14, 0), os_is_mac)
&& device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary)
{
// If we don't support at stage boundary, don't support anything else.
timestamp_query_support.insert(TimestampQuerySupport::STAGE_BOUNDARIES);

if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary) {
timestamp_query_support.insert(TimestampQuerySupport::ON_RENDER_ENCODER);
}
if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary)
{
timestamp_query_support.insert(TimestampQuerySupport::ON_COMPUTE_ENCODER);
}
if device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtBlitBoundary) {
timestamp_query_support.insert(TimestampQuerySupport::ON_BLIT_ENCODER);
}
// `TimestampQuerySupport::INSIDE_WGPU_PASSES` emerges from the other flags.
}

Self {
family_check,
msl_version: if os_is_xr || version.at_least((12, 0), (15, 0), os_is_mac) {
Expand Down Expand Up @@ -773,13 +795,7 @@ impl super::PrivateCapabilities {
} else {
None
},
support_timestamp_query: version.at_least((11, 0), (14, 0), os_is_mac)
&& device
.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtStageBoundary),
support_timestamp_query_in_passes: version.at_least((11, 0), (14, 0), os_is_mac)
&& device.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDrawBoundary)
&& device
.supports_counter_sampling(metal::MTLCounterSamplingPoint::AtDispatchBoundary),
timestamp_query_support,
}
}

Expand Down Expand Up @@ -807,12 +823,16 @@ impl super::PrivateCapabilities {
| F::DEPTH32FLOAT_STENCIL8
| F::MULTI_DRAW_INDIRECT;

features.set(F::TIMESTAMP_QUERY, self.support_timestamp_query);
// TODO: Not yet implemented.
// features.set(
// F::TIMESTAMP_QUERY_INSIDE_PASSES,
// self.support_timestamp_query_in_passes,
// );
features.set(
F::TIMESTAMP_QUERY,
self.timestamp_query_support
.contains(TimestampQuerySupport::STAGE_BOUNDARIES),
);
features.set(
F::TIMESTAMP_QUERY_INSIDE_PASSES,
self.timestamp_query_support
.contains(TimestampQuerySupport::INSIDE_WGPU_PASSES),
);
features.set(F::TEXTURE_COMPRESSION_ASTC, self.format_astc);
features.set(F::TEXTURE_COMPRESSION_ASTC_HDR, self.format_astc_hdr);
features.set(F::TEXTURE_COMPRESSION_BC, self.format_bc);
Expand Down
Loading

0 comments on commit 2003dd9

Please sign in to comment.