From 59891d94fb206cb11818f00b43676303758f1870 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Wed, 22 May 2024 16:24:52 +0200 Subject: [PATCH] [d3d12] get `num_workgroups` builtin working for indirect dispatches --- tests/tests/dispatch_workgroups_indirect.rs | 5 +-- wgpu-core/src/command/compute.rs | 4 +- wgpu-core/src/device/global.rs | 14 +++++-- wgpu-core/src/device/resource.rs | 3 +- wgpu-hal/src/dx12/command.rs | 12 ++++-- wgpu-hal/src/dx12/device.rs | 45 +++++++++++++++++++++ wgpu-hal/src/dx12/mod.rs | 3 ++ 7 files changed, 74 insertions(+), 12 deletions(-) diff --git a/tests/tests/dispatch_workgroups_indirect.rs b/tests/tests/dispatch_workgroups_indirect.rs index 4a49cec8f3a..a6114751aae 100644 --- a/tests/tests/dispatch_workgroups_indirect.rs +++ b/tests/tests/dispatch_workgroups_indirect.rs @@ -1,4 +1,4 @@ -use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; +use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext}; /// Make sure that the num_workgroups builtin works properly (it requires a workaround on D3D12). #[gpu_test] @@ -8,8 +8,7 @@ static NUM_WORKGROUPS_BUILTIN: GpuTestConfiguration = GpuTestConfiguration::new( .downlevel_flags( wgpu::DownlevelFlags::COMPUTE_SHADERS | wgpu::DownlevelFlags::INDIRECT_EXECUTION, ) - .limits(wgpu::Limits::downlevel_defaults()) - .expect_fail(FailureCase::backend(wgt::Backends::DX12)), + .limits(wgpu::Limits::downlevel_defaults()), ) .run_async(|ctx| async move { let num_workgroups = [1, 2, 3]; diff --git a/wgpu-core/src/command/compute.rs b/wgpu-core/src/command/compute.rs index 820184e9a05..391d4dd7c75 100644 --- a/wgpu-core/src/command/compute.rs +++ b/wgpu-core/src/command/compute.rs @@ -920,7 +920,7 @@ impl Global { device_id, &crate::resource::BufferDescriptor { label: None, - size: 4 * 3, + size: 4 * 3 * 2, usage: wgt::BufferUsages::INDIRECT | wgt::BufferUsages::STORAGE, mapped_at_creation: false, }, @@ -956,7 +956,7 @@ impl Global { buffer_id: dst_buffer_id, offset: 0, size: Some( - std::num::NonZeroU64::new(4 * 3).unwrap(), + std::num::NonZeroU64::new(4 * 3 * 2).unwrap(), ), }, ), diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 57638910ac2..5ced4f1ef1f 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1798,11 +1798,17 @@ impl Global { @group(0) @binding(0) var src: vec3; @group(0) @binding(1) - var dst: vec3; + var dst: array; @compute @workgroup_size(1) fn main() {{ - dst = select(src, vec3(), src > vec3({max_compute_workgroups_per_dimension}u)); + let res = select(src, vec3(), src > vec3({max_compute_workgroups_per_dimension}u)); + dst[0] = res.x; + dst[1] = res.y; + dst[2] = res.z; + dst[3] = res.x; + dst[4] = res.y; + dst[5] = res.z; }} "); @@ -1840,7 +1846,9 @@ impl Global { ty: wgt::BindingType::Buffer { ty: wgt::BufferBindingType::Storage { read_only: false }, has_dynamic_offset: false, - min_binding_size: Some(std::num::NonZeroU64::new(4 * 3).unwrap()), + min_binding_size: Some( + std::num::NonZeroU64::new(4 * 3 * 2).unwrap(), + ), }, count: None, }, diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index 56965a61ac7..6d10fb24782 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2681,7 +2681,8 @@ impl Device { let hal_desc = hal::PipelineLayoutDescriptor { label: desc.label.to_hal(self.instance_flags), - flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE, + flags: hal::PipelineLayoutFlags::FIRST_VERTEX_INSTANCE + | hal::PipelineLayoutFlags::NUM_WORK_GROUPS, bind_group_layouts: &raw_bind_group_layouts, push_constant_ranges: desc.push_constant_ranges.as_ref(), }; diff --git a/wgpu-hal/src/dx12/command.rs b/wgpu-hal/src/dx12/command.rs index 3c535b22347..87f8a5f9612 100644 --- a/wgpu-hal/src/dx12/command.rs +++ b/wgpu-hal/src/dx12/command.rs @@ -1193,11 +1193,17 @@ impl crate::CommandEncoder for super::CommandEncoder { self.list.as_ref().unwrap().dispatch(count); } unsafe fn dispatch_indirect(&mut self, buffer: &super::Buffer, offset: wgt::BufferAddress) { - self.prepare_dispatch([0; 3]); - //TODO: update special constants indirectly + self.update_root_elements(); + let cmd_signature = if let Some(cmd_signatures) = + self.pass.layout.special_constants_cmd_signatures.as_mut() + { + cmd_signatures.dispatch.as_mut_ptr() + } else { + self.shared.cmd_signatures.dispatch.as_mut_ptr() + }; unsafe { self.list.as_ref().unwrap().ExecuteIndirect( - self.shared.cmd_signatures.dispatch.as_mut_ptr(), + cmd_signature, 1, buffer.resource.as_mut_ptr(), offset, diff --git a/wgpu-hal/src/dx12/device.rs b/wgpu-hal/src/dx12/device.rs index 5625dfca3b6..b687973b361 100644 --- a/wgpu-hal/src/dx12/device.rs +++ b/wgpu-hal/src/dx12/device.rs @@ -1060,6 +1060,50 @@ impl crate::Device for super::Device { .create_root_signature(blob, 0) .into_device_result("Root signature creation")?; + let special_constants_cmd_signatures = + if let Some(root_index) = special_constants_root_index { + Some(super::CommandSignatures { + draw: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::draw(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw) signature creation")?, + draw_indexed: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::draw_indexed(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (draw_indexed) signature creation")?, + dispatch: self + .raw + .create_command_signature( + raw.clone(), + &[ + d3d12::IndirectArgument::constant(root_index, 0, 3), + d3d12::IndirectArgument::dispatch(), + ], + 12 + mem::size_of::() as u32, + 0, + ) + .into_device_result("Command (dispatch) signature creation")?, + }) + } else { + None + }; + log::debug!("\traw = {:?}", raw); if let Some(label) = desc.label { @@ -1072,6 +1116,7 @@ impl crate::Device for super::Device { signature: raw, total_root_elements: parameters.len() as super::RootIndex, special_constants_root_index, + special_constants_cmd_signatures, root_constant_info, }, bind_group_infos, diff --git a/wgpu-hal/src/dx12/mod.rs b/wgpu-hal/src/dx12/mod.rs index 99800e87c91..b30a0b33d41 100644 --- a/wgpu-hal/src/dx12/mod.rs +++ b/wgpu-hal/src/dx12/mod.rs @@ -227,6 +227,7 @@ struct Idler { event: d3d12::Event, } +#[derive(Debug, Clone)] struct CommandSignatures { draw: d3d12::CommandSignature, draw_indexed: d3d12::CommandSignature, @@ -344,6 +345,7 @@ impl PassState { signature: d3d12::RootSignature::null(), total_root_elements: 0, special_constants_root_index: None, + special_constants_cmd_signatures: None, root_constant_info: None, }, root_elements: [RootElement::Empty; MAX_ROOT_ELEMENTS], @@ -555,6 +557,7 @@ struct PipelineLayoutShared { signature: d3d12::RootSignature, total_root_elements: RootIndex, special_constants_root_index: Option, + special_constants_cmd_signatures: Option, root_constant_info: Option, }