From 2adee25db9396f52b9ecdcbc776e59754109cbb2 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 22 May 2024 16:24:52 +0200 Subject: [PATCH 1/2] [d3d12] get `num_workgroups` builtin working for indirect dispatches --- CHANGELOG.md | 1 + tests/tests/dispatch_workgroups_indirect.rs | 8 +- wgpu-core/src/device/resource.rs | 3 +- wgpu-core/src/indirect_validation.rs | 30 +++-- wgpu-hal/src/dx12/command.rs | 12 +- wgpu-hal/src/dx12/device.rs | 134 ++++++++++++++++---- wgpu-hal/src/dx12/mod.rs | 6 + 7 files changed, 150 insertions(+), 44 deletions(-) diff --git a/CHANGELOG.md b/CHANGELOG.md index 4894b6a3ee..5fdb085842 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -185,6 +185,7 @@ By @bradwerth [#6216](https://github.com/gfx-rs/wgpu/pull/6216). #### DX12 - Replace `winapi` code to use the `windows` crate. By @MarijnS95 in [#5956](https://github.com/gfx-rs/wgpu/pull/5956) and [#6173](https://github.com/gfx-rs/wgpu/pull/6173) +- Get `num_workgroups` builtin working for indirect dispatches. By @teoxoy in [#5730](https://github.com/gfx-rs/wgpu/pull/5730) #### HAL diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs index 7ba3d7f638..7729dd0312 100644 --- a/tests/tests/dispatch_workgroups_indirect.rs +++ b/tests/tests/dispatch_workgroups_indirect.rs @@ -1,4 +1,4 @@ -use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; /// Make sure that the num_workgroups builtin works properly (it requires a workaround on D3D12). #[gpu_test] @@ -12,8 +12,7 @@ static NUM_WORKGROUPS_BUILTIN: GpuTestConfiguration = GpuTestConfiguration::new( .limits(wgpu::Limits { max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let num_workgroups = [1, 2, 3]; @@ -34,8 +33,7 @@ static DISCARD_DISPATCH: GpuTestConfiguration = GpuTestConfiguration::new() max_compute_workgroups_per_dimension: 10, max_push_constant_size: 4, ..wgpu::Limits::downlevel_defaults() - }) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + }), ) .run_async(|ctx| async move { let max = ctx.device.limits().max_compute_workgroups_per_dimension; diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 4ed9dff60a..521aaf8603 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2638,7 +2638,8 @@ impl Device { let hal_desc = hal::PipelineLayoutDescriptor { label: desc.label.to_hal(self.instance_flags), - flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE, + flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE + | hal::PipelineLayoutFlags::NUM_WORK_GROUPS, bind_group_layouts: &raw_bind_group_layouts, push_constant_ranges: desc.push_constant_ranges.as_ref(), }; diff --git a/wgpu-core/src/indirect_validation.rs b/wgpu-core/src/indirect_validation.rs index ca73731465..77d74d1c5d 100644 --- a/wgpu-core/src/indirect_validation.rs +++ b/wgpu-core/src/indirect_validation.rs @@ -1,3 +1,6 @@ +use std::mem::size_of; +use std::num::NonZeroU64; + use thiserror::Error; use crate::{ @@ -57,7 +60,7 @@ impl IndirectValidation { let src = format!( " @group(0) @binding(0) - var dst: array; + var dst: array; @group(1) @binding(0) var src: array; struct OffsetPc {{ @@ -74,14 +77,25 @@ impl IndirectValidation { src.y > max_compute_workgroups_per_dimension || src.z > max_compute_workgroups_per_dimension ) {{ - dst = array(0u, 0u, 0u); + dst = array(0u, 0u, 0u, 0u, 0u, 0u); }} else {{ - dst = array(src.x, src.y, src.z); + dst = array(src.x, src.y, src.z, src.x, src.y, src.z); }} }} " ); + // SAFETY: The value we are passing to `new_unchecked` is not zero, so this is safe. + const SRC_BUFFER_SIZE: NonZeroU64 = + unsafe { NonZeroU64::new_unchecked(size_of::() as u64 * 3) }; + + // SAFETY: The value we are passing to `new_unchecked` is not zero, so this is safe. + const DST_BUFFER_SIZE: NonZeroU64 = unsafe { + NonZeroU64::new_unchecked( + SRC_BUFFER_SIZE.get() * 2, // From above: `dst: array` + ) + }; + let module = naga::front::wgsl::parse_str(&src).map_err(|inner| { CreateShaderModuleError::Parsing(naga::error::ShaderError { source: src.clone(), @@ -133,7 +147,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(DST_BUFFER_SIZE), }, count: None, }], @@ -153,7 +167,7 @@ impl IndirectValidation { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: true }, has_dynamic_offset: true, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some(SRC_BUFFER_SIZE), }, count: None, }], @@ -211,7 +225,7 @@ impl IndirectValidation { let dst_buffer_desc = hal::BufferDescriptor { label: None, - size: 4 * 3, + size: DST_BUFFER_SIZE.get(), usage: hal::BufferUses::INDIRECT | hal::BufferUses::STORAGE_READ_WRITE, memory_flags: hal::MemoryFlags::empty(), }; @@ -229,7 +243,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer: dst_buffer.as_ref(), offset: 0, - size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + size: Some(DST_BUFFER_SIZE), }], samplers: &[], textures: &[], @@ -271,7 +285,7 @@ impl IndirectValidation { buffers: &[hal::BufferBinding { buffer, offset: 0, - size: Some(std::num::NonZeroU64::new(binding_size).unwrap()), + size: Some(NonZeroU64::new(binding_size).unwrap()), }], samplers: &[], textures: &[], diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 00f56cdb5f..ea059636e3 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1210,11 +1210,17 @@ impl crate::CommandEncoder for super::CommandEncoder { } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { - self.prepare_dispatch([0; 3]); - //TODO: update special constants indirectly + self.update_root_elements(); + let cmd_signature = &self + .pass + .layout + .special_constants_cmd_signatures + .as_ref() + .unwrap_or_else(|| &self.shared.cmd_signatures) + .dispatch; unsafe { self.list.as_ref().unwrap().ExecuteIndirect( - &self.shared.cmd_signatures.dispatch, + cmd_signature, 1, &buffer.resource, offset, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 43425bb5f1..8cd4c6374f 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1,6 +1,6 @@ use std::{ ffi, - mem::{self, size_of}, + mem::{self, size_of, size_of_val}, num::NonZeroU32, ptr, sync::Arc, @@ -94,34 +94,12 @@ impl super::Device { let capacity_views = limits.max_non_sampler_bindings as u64; let capacity_samplers = 2_048; - fn create_command_signature( - raw: &Direct3D12::ID3D12Device, - byte_stride: usize, - arguments: &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC], - node_mask: u32, - ) -> Result { - let mut signature = None; - unsafe { - raw.CreateCommandSignature( - &Direct3D12::D3D12_COMMAND_SIGNATURE_DESC { - ByteStride: byte_stride as u32, - NumArgumentDescs: arguments.len() as u32, - pArgumentDescs: arguments.as_ptr(), - NodeMask: node_mask, - }, - None, - &mut signature, - ) - } - .into_device_result("Command signature creation")?; - signature.ok_or(crate::DeviceError::Unexpected) - } - let shared = super::DeviceShared { zero_buffer, cmd_signatures: super::CommandSignatures { - draw: create_command_signature( + draw: Self::create_command_signature( &raw, + None, size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, @@ -129,8 +107,9 @@ impl super::Device { }], 0, )?, - draw_indexed: create_command_signature( + draw_indexed: Self::create_command_signature( &raw, + None, size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, @@ -138,8 +117,9 @@ impl super::Device { }], 0, )?, - dispatch: create_command_signature( + dispatch: Self::create_command_signature( &raw, + None, size_of::(), &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, @@ -214,6 +194,30 @@ impl super::Device { }) } + fn create_command_signature( + raw: &Direct3D12::ID3D12Device, + root_signature: Option<&Direct3D12::ID3D12RootSignature>, + byte_stride: usize, + arguments: &[Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC], + node_mask: u32, + ) -> Result { + let mut signature = None; + unsafe { + raw.CreateCommandSignature( + &Direct3D12::D3D12_COMMAND_SIGNATURE_DESC { + ByteStride: byte_stride as u32, + NumArgumentDescs: arguments.len() as u32, + pArgumentDescs: arguments.as_ptr(), + NodeMask: node_mask, + }, + root_signature, + &mut signature, + ) + } + .into_device_result("Command signature creation")?; + signature.ok_or(crate::DeviceError::Unexpected) + } + // Blocks until the dedicated present queue is finished with all of its work. // // Once this method completes, the surface is able to be resized or deleted. @@ -1119,6 +1123,81 @@ impl crate::Device for super::Device { } .into_device_result("Root signature creation")?; + let special_constants_cmd_signatures = if let Some(root_index) = + special_constants_root_index + { + let constant_indirect_argument_desc = Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, + Anonymous: Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC_0 { + Constant: Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC_0_1 { + RootParameterIndex: root_index, + DestOffsetIn32BitValues: 0, + Num32BitValuesToSet: 3, + }, + }, + }; + let special_constant_buffer_args_len = { + // Hack: construct a dummy value of the special constants buffer value we need to + // fill, and calculate the size of each member. + let super::RootElement::SpecialConstantBuffer { + first_vertex, + first_instance, + other, + } = (super::RootElement::SpecialConstantBuffer { + first_vertex: 0, + first_instance: 0, + other: 0, + }) + else { + unreachable!(); + }; + size_of_val(&first_vertex) + size_of_val(&first_instance) + size_of_val(&other) + }; + Some(super::CommandSignatures { + draw: Self::create_command_signature( + &self.raw, + Some(&raw), + special_constant_buffer_args_len + size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW, + ..Default::default() + }, + ], + 0, + )?, + draw_indexed: Self::create_command_signature( + &self.raw, + Some(&raw), + special_constant_buffer_args_len + size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DRAW_INDEXED, + ..Default::default() + }, + ], + 0, + )?, + dispatch: Self::create_command_signature( + &self.raw, + Some(&raw), + special_constant_buffer_args_len + size_of::(), + &[ + constant_indirect_argument_desc, + Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { + Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_DISPATCH, + ..Default::default() + }, + ], + 0, + )?, + }) + } else { + None + }; + if let Some(label) = desc.label { unsafe { raw.SetName(&windows::core::HSTRING::from(label)) } .into_device_result("SetName")?; @@ -1131,6 +1210,7 @@ impl crate::Device for super::Device { signature: Some(raw), total_root_elements: parameters.len() as super::RootIndex, special_constants_root_index, + special_constants_cmd_signatures, root_constant_info, }, bind_group_infos, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 0efb418135..cf6af8359b 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -564,6 +564,7 @@ struct Idler { event: Event, } +#[derive(Debug, Clone)] struct CommandSignatures { draw: Direct3D12::ID3D12CommandSignature, draw_indexed: Direct3D12::ID3D12CommandSignature, @@ -636,8 +637,11 @@ enum RootElement { Empty, Constant, SpecialConstantBuffer { + /// The first vertex in an indirect draw call, _or_ the `x` of a compute dispatch. first_vertex: i32, + /// The first instance in an indirect draw call, _or_ the `y` of a compute dispatch. first_instance: u32, + /// Unused in an indirect draw call, _or_ the `z` of a compute dispatch. other: u32, }, /// Descriptor table. @@ -682,6 +686,7 @@ impl PassState { signature: None, total_root_elements: 0, special_constants_root_index: None, + special_constants_cmd_signatures: None, root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], @@ -919,6 +924,7 @@ struct PipelineLayoutShared { signature: Option, total_root_elements: RootIndex, special_constants_root_index: Option, + special_constants_cmd_signatures: Option, root_constant_info: Option, } From 2cee8d27082dcd3d5d22924fd1256d152b334aed Mon Sep 17 00:00:00 2001 From: Erich Gubler Date: Tue, 3 Sep 2024 17:03:02 -0400 Subject: [PATCH 2/2] refactor(dx12): merge `PipelineLayoutShared::special_constants_{root_index,cmd_signatures}` into new `struct` --- wgpu-hal/src/dx12/command.rs | 21 +++++++++++++++++---- wgpu-hal/src/dx12/device.rs | 14 ++++++++------ wgpu-hal/src/dx12/mod.rs | 15 +++++++++++---- 3 files changed, 36 insertions(+), 14 deletions(-) diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index ea059636e3..09cf07775e 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -107,7 +107,13 @@ impl super::CommandEncoder { ); } } - if let Some(root_index) = self.pass.layout.special_constants_root_index { + if let Some(root_index) = self + .pass + .layout + .special_constants + .as_ref() + .map(|sc| sc.root_index) + { let needs_update = match self.pass.root_elements[root_index as usize] { super::RootElement::SpecialConstantBuffer { first_vertex: other_vertex, @@ -130,7 +136,13 @@ impl super::CommandEncoder { } fn prepare_dispatch(&mut self, count: [u32; 3]) { - if let Some(root_index) = self.pass.layout.special_constants_root_index { + if let Some(root_index) = self + .pass + .layout + .special_constants + .as_ref() + .map(|sc| sc.root_index) + { let needs_update = match self.pass.root_elements[root_index as usize] { super::RootElement::SpecialConstantBuffer { first_vertex, @@ -230,7 +242,7 @@ impl super::CommandEncoder { } fn reset_signature(&mut self, layout: &super::PipelineLayoutShared) { - if let Some(root_index) = layout.special_constants_root_index { + if let Some(root_index) = layout.special_constants.as_ref().map(|sc| sc.root_index) { self.pass.root_elements[root_index as usize] = super::RootElement::SpecialConstantBuffer { first_vertex: 0, @@ -1214,8 +1226,9 @@ impl crate::CommandEncoder for super::CommandEncoder { let cmd_signature = &self .pass .layout - .special_constants_cmd_signatures + .special_constants .as_ref() + .map(|sc| &sc.cmd_signatures) .unwrap_or_else(|| &self.shared.cmd_signatures) .dispatch; unsafe { diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 8cd4c6374f..171e2a36be 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1123,9 +1123,7 @@ impl crate::Device for super::Device { } .into_device_result("Root signature creation")?; - let special_constants_cmd_signatures = if let Some(root_index) = - special_constants_root_index - { + let special_constants = if let Some(root_index) = special_constants_root_index { let constant_indirect_argument_desc = Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC { Type: Direct3D12::D3D12_INDIRECT_ARGUMENT_TYPE_CONSTANT, Anonymous: Direct3D12::D3D12_INDIRECT_ARGUMENT_DESC_0 { @@ -1153,7 +1151,7 @@ impl crate::Device for super::Device { }; size_of_val(&first_vertex) + size_of_val(&first_instance) + size_of_val(&other) }; - Some(super::CommandSignatures { + let cmd_signatures = super::CommandSignatures { draw: Self::create_command_signature( &self.raw, Some(&raw), @@ -1193,6 +1191,11 @@ impl crate::Device for super::Device { ], 0, )?, + }; + + Some(super::PipelineLayoutSpecialConstants { + root_index, + cmd_signatures, }) } else { None @@ -1209,8 +1212,7 @@ impl crate::Device for super::Device { shared: super::PipelineLayoutShared { signature: Some(raw), total_root_elements: parameters.len() as super::RootIndex, - special_constants_root_index, - special_constants_cmd_signatures, + special_constants, root_constant_info, }, bind_group_infos, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index cf6af8359b..b67754bbb2 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -685,8 +685,7 @@ impl PassState { layout: PipelineLayoutShared { signature: None, total_root_elements: 0, - special_constants_root_index: None, - special_constants_cmd_signatures: None, + special_constants: None, root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], @@ -923,14 +922,22 @@ struct RootConstantInfo { struct PipelineLayoutShared { signature: Option, total_root_elements: RootIndex, - special_constants_root_index: Option, - special_constants_cmd_signatures: Option, + special_constants: Option, root_constant_info: Option, } unsafe impl Send for PipelineLayoutShared {} unsafe impl Sync for PipelineLayoutShared {} +#[derive(Debug, Clone)] +struct PipelineLayoutSpecialConstants { + root_index: RootIndex, + cmd_signatures: CommandSignatures, +} + +unsafe impl Send for PipelineLayoutSpecialConstants {} +unsafe impl Sync for PipelineLayoutSpecialConstants {} + #[derive(Debug)] pub struct PipelineLayout { shared: PipelineLayoutShared,