diff --git a/deno_webgpu/pipeline.rs b/deno_webgpu/pipeline.rs index f9257051191..86d530332f7 100644 --- a/deno_webgpu/pipeline.rs +++ b/deno_webgpu/pipeline.rs @@ -112,7 +112,6 @@ pub fn op_webgpu_create_compute_pipeline( entry_point: compute.entry_point.map(Cow::from), constants: Cow::Owned(compute.constants.unwrap_or_default()), zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }, cache: None, }; @@ -348,7 +347,6 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(fragment.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }, targets: Cow::Owned(fragment.targets), }) @@ -374,7 +372,6 @@ pub fn op_webgpu_create_render_pipeline( constants: Cow::Owned(args.vertex.constants.unwrap_or_default()), // Required to be true for WebGPU zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }, buffers: Cow::Owned(vertex_buffers), }, diff --git a/naga/CHANGELOG.md b/naga/CHANGELOG.md index 2a00f01f86d..49cde4e2123 100644 --- a/naga/CHANGELOG.md +++ b/naga/CHANGELOG.md @@ -81,6 +81,7 @@ For changelogs after v0.14, see [the wgpu changelog](../CHANGELOG.md). - Make varyings' struct members unique. ([#2521](https://github.com/gfx-rs/naga/pull/2521)) **@evahop** - Add experimental vertex pulling transform flag. ([#5254](https://github.com/gfx-rs/wgpu/pull/5254)) **@bradwerth** - Fixup some generated MSL for vertex buffer unpack functions. ([#5829](https://github.com/gfx-rs/wgpu/pull/5829)) **@bradwerth** +- Make vertex pulling transform on by default. ([#5773](https://github.com/gfx-rs/wgpu/pull/5773)) **@bradwerth** #### GLSL-OUT diff --git a/naga/src/back/msl/mod.rs b/naga/src/back/msl/mod.rs index 3b33ee7a716..626475debcd 100644 --- a/naga/src/back/msl/mod.rs +++ b/naga/src/back/msl/mod.rs @@ -354,7 +354,9 @@ pub struct PipelineOptions { /// to receive the vertex buffers, lengths, and vertex id as args, /// and bounds-check the vertex id and use the index into the /// vertex buffers to access attributes, rather than using Metal's - /// [[stage-in]] assembled attribute data. + /// [[stage-in]] assembled attribute data. This is true by default, + /// but remains configurable for use by tests via deserialization + /// of this struct. There is no user-facing way to set this value. pub vertex_pulling_transform: bool, /// vertex_buffer_mappings are used during shader translation to diff --git a/naga/src/back/msl/writer.rs b/naga/src/back/msl/writer.rs index 8b868970078..7ec22009bd4 100644 --- a/naga/src/back/msl/writer.rs +++ b/naga/src/back/msl/writer.rs @@ -3953,8 +3953,8 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::float2((float(b0) - 128.0f) / 255.0f, \ - (float(b1) - 128.0f) / 255.0f);", + "{}return metal::float2(metal::max(-1.0f, as_type(b0) / 127.0f), \ + metal::max(-1.0f, as_type(b1) / 127.0f));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -3971,10 +3971,10 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::float4((float(b0) - 128.0f) / 255.0f, \ - (float(b1) - 128.0f) / 255.0f, \ - (float(b2) - 128.0f) / 255.0f, \ - (float(b3) - 128.0f) / 255.0f);", + "{}return metal::float4(metal::max(-1.0f, as_type(b0) / 127.0f), \ + metal::max(-1.0f, as_type(b1) / 127.0f), \ + metal::max(-1.0f, as_type(b2) / 127.0f), \ + metal::max(-1.0f, as_type(b3) / 127.0f));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4033,8 +4033,8 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::int2(as_type(b1 << 8 | b0), \ - as_type(b3 << 8 | b2));", + "{}return metal::int2(as_type(metal::ushort(b1 << 8 | b0)), \ + as_type(metal::ushort(b3 << 8 | b2)));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4055,10 +4055,10 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::int4(as_type(b1 << 8 | b0), \ - as_type(b3 << 8 | b2), \ - as_type(b5 << 8 | b4), \ - as_type(b7 << 8 | b6));", + "{}return metal::int4(as_type(metal::ushort(b1 << 8 | b0)), \ + as_type(metal::ushort(b3 << 8 | b2)), \ + as_type(metal::ushort(b5 << 8 | b4)), \ + as_type(metal::ushort(b7 << 8 | b6)));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4117,8 +4117,7 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::float2((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \ - (float(b3 << 8 | b2) - 32767.0f) / 65535.0f);", + "{}return metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2);", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4139,10 +4138,8 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::float4((float(b1 << 8 | b0) - 32767.0f) / 65535.0f, \ - (float(b3 << 8 | b2) - 32767.0f) / 65535.0f, \ - (float(b5 << 8 | b4) - 32767.0f) / 65535.0f, \ - (float(b7 << 8 | b6) - 32767.0f) / 65535.0f);", + "{}return metal::float4(metal::unpack_snorm2x16_to_float(b1 << 24 | b0 << 16 | b3 << 8 | b2), \ + metal::unpack_snorm2x16_to_float(b5 << 24 | b4 << 16 | b7 << 8 | b6));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4159,8 +4156,8 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::float2(as_type(b1 << 8 | b0), \ - as_type(b3 << 8 | b2));", + "{}return metal::float2(as_type(metal::ushort(b1 << 8 | b0)), \ + as_type(metal::ushort(b3 << 8 | b2)));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4170,7 +4167,7 @@ impl Writer { let name = self.namer.call("unpackFloat16x4"); writeln!( self.out, - "metal::int4 {name}(metal::ushort b0, \ + "metal::float4 {name}(metal::ushort b0, \ metal::ushort b1, \ metal::ushort b2, \ metal::ushort b3, \ @@ -4181,10 +4178,10 @@ impl Writer { )?; writeln!( self.out, - "{}return metal::int4(as_type(b1 << 8 | b0), \ - as_type(b3 << 8 | b2), \ - as_type(b5 << 8 | b4), \ - as_type(b7 << 8 | b6));", + "{}return metal::float4(as_type(metal::ushort(b1 << 8 | b0)), \ + as_type(metal::ushort(b3 << 8 | b2)), \ + as_type(metal::ushort(b5 << 8 | b4)), \ + as_type(metal::ushort(b7 << 8 | b6)));", back::INDENT )?; writeln!(self.out, "}}")?; @@ -4390,10 +4387,10 @@ impl Writer { let name = self.namer.call("unpackSint32"); writeln!( self.out, - "metal::int {name}(uint b0, \ - uint b1, \ - uint b2, \ - uint b3) {{" + "int {name}(uint b0, \ + uint b1, \ + uint b2, \ + uint b3) {{" )?; writeln!( self.out, @@ -4495,7 +4492,18 @@ impl Writer { )?; writeln!( self.out, - "{}return unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);", + // The following is correct for RGBA packing, but our format seems to + // match ABGR, which can be fed into the Metal builtin function + // unpack_unorm10a2_to_float. + /* + "{}uint v = (b3 << 24 | b2 << 16 | b1 << 8 | b0); \ + uint r = (v & 0xFFC00000) >> 22; \ + uint g = (v & 0x003FF000) >> 12; \ + uint b = (v & 0x00000FFC) >> 2; \ + uint a = (v & 0x00000003); \ + return metal::float4(float(r) / 1023.0f, float(g) / 1023.0f, float(b) / 1023.0f, float(a) / 3.0f);", + */ + "{}return metal::unpack_unorm10a2_to_float(b3 << 24 | b2 << 16 | b1 << 8 | b0);", back::INDENT )?; writeln!(self.out, "}}")?; diff --git a/tests/tests/root.rs b/tests/tests/root.rs index 6ceb3818df8..384cfcf78fc 100644 --- a/tests/tests/root.rs +++ b/tests/tests/root.rs @@ -42,6 +42,7 @@ mod subgroup_operations; mod texture_bounds; mod texture_view_creation; mod transfer; +mod vertex_formats; mod vertex_indices; mod write_texture; mod zero_init_texture_after_discard; diff --git a/tests/tests/vertex_formats/draw.vert.wgsl b/tests/tests/vertex_formats/draw.vert.wgsl new file mode 100644 index 00000000000..bf6a08aac6c --- /dev/null +++ b/tests/tests/vertex_formats/draw.vert.wgsl @@ -0,0 +1,316 @@ +@group(0) @binding(0) +var checksums: array; + +const index_uint = 0u; +const index_sint = 1u; +const index_unorm = 2u; +const index_snorm = 3u; +const index_float16 = 4u; +const index_float32 = 5u; + +fn init_checksums() { + checksums[index_uint] = 0.0; + checksums[index_sint] = 0.0; + checksums[index_unorm] = 0.0; + checksums[index_snorm] = 0.0; + checksums[index_float16] = 0.0; + checksums[index_float32] = 0.0; +} + +// Break down the 31 vertex formats specified at +// https://gpuweb.github.io/gpuweb/#vertex-formats into blocks +// of 8, to keep under the limits of max locations. Each +// AttributeBlockX structure will get a corresponding +// vertex_block_X function to process its attributes into +// values written to the checksums buffer. + +struct AttributeBlock0 { + // 4-byte-aligned unorm formats + @location(0) unorm8x4: vec4, + @location(1) unorm16x2: vec2, + @location(2) unorm16x4: vec4, + + // 4-byte-aligned snorm formats + @location(3) snorm8x4: vec4, + @location(4) snorm16x2: vec2, + @location(5) snorm16x4: vec4, + + // 2-byte-aligned formats + @location(6) unorm8x2: vec2, + @location(7) snorm8x2: vec2, +} + +@vertex +fn vertex_block_0(v_in: AttributeBlock0) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all unorm into one checksum value. + var all_unorm: f32 = 0.0; + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y); + + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w); + + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y); + + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w); + + checksums[index_unorm] = f32(all_unorm); + + // Accumulate all snorm into one checksum value. + var all_snorm: f32 = 0.0; + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y); + + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w); + + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y); + + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z); + all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w); + + checksums[index_snorm] = f32(all_snorm); + + return vec4(0.0); +} + +struct AttributeBlock1 { + // 4-byte-aligned uint formats + @location(0) uint8x4: vec4, + @location(1) uint16x2: vec2, + @location(2) uint16x4: vec4, + + // 4-byte-aligned sint formats + @location(3) sint8x4: vec4, + @location(4) sint16x2: vec2, + @location(5) sint16x4: vec4, + + // 2-byte-aligned formats + @location(6) uint8x2: vec2, + @location(7) sint8x2: vec2, +} + +@vertex +fn vertex_block_1(v_in: AttributeBlock1) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all uint into one checksum value. + var all_uint: u32 = 0; + all_uint = accumulate_uint(all_uint, v_in.uint8x2.x); + all_uint = accumulate_uint(all_uint, v_in.uint8x2.y); + + all_uint = accumulate_uint(all_uint, v_in.uint8x4.x); + all_uint = accumulate_uint(all_uint, v_in.uint8x4.y); + all_uint = accumulate_uint(all_uint, v_in.uint8x4.z); + all_uint = accumulate_uint(all_uint, v_in.uint8x4.w); + + all_uint = accumulate_uint(all_uint, v_in.uint16x2.x); + all_uint = accumulate_uint(all_uint, v_in.uint16x2.y); + + all_uint = accumulate_uint(all_uint, v_in.uint16x4.x); + all_uint = accumulate_uint(all_uint, v_in.uint16x4.y); + all_uint = accumulate_uint(all_uint, v_in.uint16x4.z); + all_uint = accumulate_uint(all_uint, v_in.uint16x4.w); + + checksums[index_uint] = f32(all_uint); + + // Accumulate all sint into one checksum value. + var all_sint: i32 = 0; + all_sint = accumulate_sint(all_sint, v_in.sint8x2.x); + all_sint = accumulate_sint(all_sint, v_in.sint8x2.y); + + all_sint = accumulate_sint(all_sint, v_in.sint8x4.x); + all_sint = accumulate_sint(all_sint, v_in.sint8x4.y); + all_sint = accumulate_sint(all_sint, v_in.sint8x4.z); + all_sint = accumulate_sint(all_sint, v_in.sint8x4.w); + + all_sint = accumulate_sint(all_sint, v_in.sint16x2.x); + all_sint = accumulate_sint(all_sint, v_in.sint16x2.y); + + all_sint = accumulate_sint(all_sint, v_in.sint16x4.x); + all_sint = accumulate_sint(all_sint, v_in.sint16x4.y); + all_sint = accumulate_sint(all_sint, v_in.sint16x4.z); + all_sint = accumulate_sint(all_sint, v_in.sint16x4.w); + + checksums[index_sint] = f32(all_sint); + + return vec4(0.0); +} + +struct AttributeBlock2 { + @location(0) uint32: u32, + @location(1) uint32x2: vec2, + @location(2) uint32x3: vec3, + @location(3) uint32x4: vec4, +} + +@vertex +fn vertex_block_2(v_in: AttributeBlock2) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all uint into one checksum value. + var all_uint: u32 = 0; + all_uint = accumulate_uint(all_uint, v_in.uint32); + + all_uint = accumulate_uint(all_uint, v_in.uint32x2.x); + all_uint = accumulate_uint(all_uint, v_in.uint32x2.y); + + all_uint = accumulate_uint(all_uint, v_in.uint32x3.x); + all_uint = accumulate_uint(all_uint, v_in.uint32x3.y); + all_uint = accumulate_uint(all_uint, v_in.uint32x3.z); + + all_uint = accumulate_uint(all_uint, v_in.uint32x4.x); + all_uint = accumulate_uint(all_uint, v_in.uint32x4.y); + all_uint = accumulate_uint(all_uint, v_in.uint32x4.z); + all_uint = accumulate_uint(all_uint, v_in.uint32x4.w); + + checksums[index_uint] = f32(all_uint); + + return vec4(0.0); +} + +struct AttributeBlock3 { + @location(0) sint32: i32, + @location(1) sint32x2: vec2, + @location(2) sint32x3: vec3, + @location(3) sint32x4: vec4, +} + +@vertex +fn vertex_block_3(v_in: AttributeBlock3) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all sint into one checksum value. + var all_sint: i32 = 0; + all_sint = accumulate_sint(all_sint, v_in.sint32); + + all_sint = accumulate_sint(all_sint, v_in.sint32x2.x); + all_sint = accumulate_sint(all_sint, v_in.sint32x2.y); + + all_sint = accumulate_sint(all_sint, v_in.sint32x3.x); + all_sint = accumulate_sint(all_sint, v_in.sint32x3.y); + all_sint = accumulate_sint(all_sint, v_in.sint32x3.z); + + all_sint = accumulate_sint(all_sint, v_in.sint32x4.x); + all_sint = accumulate_sint(all_sint, v_in.sint32x4.y); + all_sint = accumulate_sint(all_sint, v_in.sint32x4.z); + all_sint = accumulate_sint(all_sint, v_in.sint32x4.w); + + checksums[index_sint] = f32(all_sint); + + return vec4(0.0); +} + +struct AttributeBlock4{ + @location(0) float32: f32, + @location(1) float32x2: vec2, + @location(2) float32x3: vec3, + @location(3) float32x4: vec4, + @location(4) float16x2: vec2, + @location(5) float16x4: vec4, +} + +@vertex +fn vertex_block_4(v_in: AttributeBlock4) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all float32 into one checksum value. + var all_float32: f32 = 0.0; + all_float32 = accumulate_float32(all_float32, v_in.float32); + + all_float32 = accumulate_float32(all_float32, v_in.float32x2.x); + all_float32 = accumulate_float32(all_float32, v_in.float32x2.y); + + all_float32 = accumulate_float32(all_float32, v_in.float32x3.x); + all_float32 = accumulate_float32(all_float32, v_in.float32x3.y); + all_float32 = accumulate_float32(all_float32, v_in.float32x3.z); + + all_float32 = accumulate_float32(all_float32, v_in.float32x4.x); + all_float32 = accumulate_float32(all_float32, v_in.float32x4.y); + all_float32 = accumulate_float32(all_float32, v_in.float32x4.z); + all_float32 = accumulate_float32(all_float32, v_in.float32x4.w); + + checksums[index_float32] = f32(all_float32); + + // Accumulate all float16 into one checksum value. + var all_float16: f32 = 0.0; + all_float16 = accumulate_float16(all_float16, v_in.float16x2.x); + all_float16 = accumulate_float16(all_float16, v_in.float16x2.y); + + all_float16 = accumulate_float16(all_float16, v_in.float16x4.x); + all_float16 = accumulate_float16(all_float16, v_in.float16x4.y); + all_float16 = accumulate_float16(all_float16, v_in.float16x4.z); + all_float16 = accumulate_float16(all_float16, v_in.float16x4.w); + + checksums[index_float16] = f32(all_float16); + + return vec4(0.0); +} + +struct AttributeBlock5{ + @location(0) unorm10_10_10_2: vec4, +} + +@vertex +fn vertex_block_5(v_in: AttributeBlock5) -> @builtin(position) vec4 +{ + init_checksums(); + + // Accumulate all unorm into one checksum value. + var all_unorm: f32 = 0.0; + all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z); + all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w); + + checksums[index_unorm] = f32(all_unorm); + + return vec4(0.0); +} + +fn accumulate_uint(accum: u32, val: u32) -> u32 { + return accum + val; +} + +fn accumulate_sint(accum: i32, val: i32) -> i32 { + return accum + val; +} + +fn accumulate_unorm(accum: f32, val: f32) -> f32 { + return accum + val; +} + +fn accumulate_snorm(accum: f32, val: f32) -> f32 { + return accum + val; +} + +fn accumulate_float16(accum: f32, val: f32) -> f32 { + return accum + val; +} + +fn accumulate_float32(accum: f32, val: f32) -> f32 { + return accum + val; +} + +@fragment +fn fragment_main() -> @location(0) vec4 { + return vec4(0.0); +} diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs new file mode 100644 index 00000000000..8bd78d7c7b3 --- /dev/null +++ b/tests/tests/vertex_formats/mod.rs @@ -0,0 +1,386 @@ +//! Tests that vertex formats pass through to vertex shaders accurately. + +use std::num::NonZeroU64; + +use wgpu::util::{BufferInitDescriptor, DeviceExt}; + +use wgpu_test::{gpu_test, FailureCase, GpuTestConfiguration, TestParameters, TestingContext}; + +#[derive(Debug, Copy, Clone)] +enum TestCase { + UnormsAndSnorms, + UintsAndSintsSmall, + UintsBig, + SintsBig, + Floats, + Unorm1010102, +} + +struct Test<'a> { + case: TestCase, + entry_point: &'a str, + attributes: &'a [wgt::VertexAttribute], + input: &'a [u8], + checksums: &'a [f32], +} + +async fn vertex_formats_all(ctx: TestingContext) { + let attributes_block_0 = &wgpu::vertex_attr_array![ + 0 => Unorm8x4, + 1 => Unorm16x2, + 2 => Unorm16x4, + 3 => Snorm8x4, + 4 => Snorm16x2, + 5 => Snorm16x4, + 6 => Unorm8x2, + 7 => Snorm8x2, + ]; + + let attributes_block_1 = &wgpu::vertex_attr_array![ + 0 => Uint8x4, + 1 => Uint16x2, + 2 => Uint16x4, + 3 => Sint8x4, + 4 => Sint16x2, + 5 => Sint16x4, + 6 => Uint8x2, + 7 => Sint8x2, + ]; + + let attributes_block_2 = &wgpu::vertex_attr_array![ + 0 => Uint32, + 1 => Uint32x2, + 2 => Uint32x3, + 3 => Uint32x4, + ]; + + let attributes_block_3 = &wgpu::vertex_attr_array![ + 0 => Sint32, + 1 => Sint32x2, + 2 => Sint32x3, + 3 => Sint32x4, + ]; + + let attributes_block_4 = &wgpu::vertex_attr_array![ + 0 => Float32, + 1 => Float32x2, + 2 => Float32x3, + 3 => Float32x4, + 4 => Float16x2, + 5 => Float16x4, + ]; + + let tests = vec![ + Test { + case: TestCase::UnormsAndSnorms, + entry_point: "vertex_block_0", + attributes: attributes_block_0, + input: &[ + 128u8, 128u8, 128u8, 128u8, // Unorm8x4 (0.5, 0.5, 0.5, 0.5) + 0u8, 128u8, 0u8, 128u8, // Unorm16x2 (0.5, 0.5) + 0u8, 64u8, 0u8, 64u8, 0u8, 64u8, 0u8, + 64u8, // Unorm16x4 (0.25, 0.25, 0.25, 0.25) + 127u8, 127u8, 127u8, 127u8, // Snorm8x4 (1, 1, 1, 1) + 0u8, 128u8, 0u8, 128u8, // Snorm16x2 (-1, -1) + 0u8, 127u8, 0u8, 127u8, 0u8, 127u8, 0u8, 127u8, // Snorm16x4 (1, 1, 1, 1) + 255u8, 255u8, // Unorm8x2 (1, 1) + 128u8, 128u8, // Snorm8x2 (-1, -1) + ], + checksums: &[0.0, 0.0, 6.0, 4.0, 0.0, 0.0], + }, + Test { + case: TestCase::UintsAndSintsSmall, + entry_point: "vertex_block_1", + attributes: attributes_block_1, + input: &[ + 4u8, 8u8, 16u8, 32u8, // Uint8x4 (4, 8, 16, 32) + 64u8, 0u8, 128u8, 0u8, // Uint16x2 (64, 128) + 0u8, 1u8, 0u8, 2u8, 0u8, 4u8, 0u8, 8u8, // Uint16x4 (256, 512, 1024, 2048) + 127u8, 127u8, 2u8, 0u8, // Sint8x4 (127, 127, 2, 0) + 255u8, 255u8, 1u8, 0u8, // Sint16x2 (-1, 1) + 128u8, 255u8, 128u8, 255u8, 0u8, 1u8, 240u8, + 255u8, // Sint16x4 (-128, -128, 256, -16) + 1u8, 2u8, // Uint8x2 (1, 2) + 128u8, 128u8, // Sint8x2 (-128, -128) + ], + checksums: &[4095.0, -16.0, 0.0, 0.0, 0.0, 0.0], + }, + Test { + case: TestCase::UintsBig, + entry_point: "vertex_block_2", + attributes: attributes_block_2, + input: &[ + 1u8, 0u8, 0u8, 0u8, // Uint32x2 (1) + 2u8, 0u8, 0u8, 0u8, 4u8, 0u8, 0u8, 0u8, // Uint32x2 (2, 4) + 8u8, 0u8, 0u8, 0u8, 16u8, 0u8, 0u8, 0u8, 32u8, 0u8, 0u8, + 0u8, // Uint32x3 (8, 16, 32) + 64u8, 0u8, 0u8, 0u8, 128u8, 0u8, 0u8, 0u8, 0u8, 1u8, 0u8, 0u8, 0u8, 2u8, 0u8, + 0u8, // Uint32x4 (64, 128, 256, 512) + ], + checksums: &[1023.0, 0.0, 0.0, 0.0, 0.0, 0.0], + }, + Test { + case: TestCase::SintsBig, + entry_point: "vertex_block_3", + attributes: attributes_block_3, + input: &[ + 128u8, 255u8, 255u8, 255u8, // Sint32 (-128) + 120u8, 0u8, 0u8, 0u8, 8u8, 0u8, 0u8, 0u8, // Sint32x2 (120, 8) + 252u8, 255u8, 255u8, 255u8, 2u8, 0u8, 0u8, 0u8, 2u8, 0u8, 0u8, + 0u8, // Sint32x3 (-4, 2, 2) + 24u8, 252u8, 255u8, 255u8, 88u8, 2u8, 0u8, 0u8, 44u8, 1u8, 0u8, 0u8, 99u8, 0u8, + 0u8, 0u8, // Sint32x4 (-1000, 600, 300, 99) + ], + checksums: &[0.0, -1.0, 0.0, 0.0, 0.0, 0.0], + }, + Test { + case: TestCase::Floats, + entry_point: "vertex_block_4", + attributes: attributes_block_4, + input: &[ + 0u8, 0u8, 0u8, 63u8, // Float32 (0.5) + 0u8, 0u8, 0u8, 191u8, 0u8, 0u8, 128u8, 64u8, // Float32x2 (-0.5, 4.0) + 0u8, 0u8, 0u8, 192u8, 0u8, 0u8, 204u8, 194u8, 0u8, 0u8, 200u8, + 66u8, // Float32x3 (-2.0, -102.0, 100.0) + 0u8, 0u8, 92u8, 66u8, 0u8, 0u8, 72u8, 194u8, 0u8, 0u8, 32u8, 65u8, 0u8, 0u8, 128u8, + 63u8, // Float32x4 (55.0, -50.0, 10.0, 1.0) + 0u8, 60u8, 72u8, 53u8, // Float16x2 (1.0, 0.33) + 72u8, 57u8, 0u8, 192u8, 0u8, 188u8, 0u8, 184u8, // Float16x4 (0.66, -2.0, -1.0, -0.5) + ], + checksums: &[0.0, 0.0, 0.0, 0.0, -1.5, 16.0], + }, + ]; + + vertex_formats_common(ctx, &tests).await; +} + +async fn vertex_formats_10_10_10_2(ctx: TestingContext) { + let attributes_block_5 = &wgpu::vertex_attr_array![ + 0 => Unorm10_10_10_2, + ]; + + let tests = vec![Test { + case: TestCase::Unorm1010102, + entry_point: "vertex_block_5", + attributes: attributes_block_5, + input: &[ + // We are aiming for rgba of (0.5, 0.5, 0.5, 0.66) + // Packing AA BB BBBB BBBB GGGG GGGG GG RR RRRR RRRR + // Binary 10 10 0000 0000 1000 0000 00 10 0000 0000 + // Hex A0 08 02 00 + // Decimal 160 8 2 0 + // unorm 0.66 0.5 0.5 0.5 = 2.16 + 0u8, 2u8, 8u8, 160u8, // Unorm10_10_10_2 + ], + checksums: &[0.0, 0.0, 2.16, 0.0, 0.0, 0.0], + }]; + + vertex_formats_common(ctx, &tests).await; +} + +async fn vertex_formats_common(ctx: TestingContext, tests: &[Test<'_>]) { + let shader = ctx + .device + .create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl")); + + let bgl = ctx + .device + .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor { + label: None, + entries: &[wgpu::BindGroupLayoutEntry { + binding: 0, + ty: wgpu::BindingType::Buffer { + ty: wgpu::BufferBindingType::Storage { read_only: false }, + has_dynamic_offset: false, + min_binding_size: NonZeroU64::new(4), + }, + visibility: wgpu::ShaderStages::VERTEX, + count: None, + }], + }); + + let ppl = ctx + .device + .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor { + label: None, + bind_group_layouts: &[&bgl], + push_constant_ranges: &[], + }); + + let dummy = ctx + .device + .create_texture_with_data( + &ctx.queue, + &wgpu::TextureDescriptor { + label: Some("dummy"), + size: wgpu::Extent3d { + width: 1, + height: 1, + depth_or_array_layers: 1, + }, + mip_level_count: 1, + sample_count: 1, + dimension: wgpu::TextureDimension::D2, + format: wgpu::TextureFormat::Rgba8Unorm, + usage: wgpu::TextureUsages::RENDER_ATTACHMENT | wgpu::TextureUsages::COPY_DST, + view_formats: &[], + }, + wgpu::util::TextureDataOrder::LayerMajor, + &[0, 0, 0, 1], + ) + .create_view(&wgpu::TextureViewDescriptor::default()); + + let mut failed = false; + for test in tests { + let buffer_input = ctx.device.create_buffer_init(&BufferInitDescriptor { + label: None, + contents: bytemuck::cast_slice(test.input), + usage: wgpu::BufferUsages::VERTEX, + }); + + let pipeline_desc = wgpu::RenderPipelineDescriptor { + label: None, + layout: Some(&ppl), + vertex: wgpu::VertexState { + buffers: &[wgpu::VertexBufferLayout { + array_stride: 0, // Calculate, please! + step_mode: wgpu::VertexStepMode::Vertex, + attributes: test.attributes, + }], + module: &shader, + entry_point: test.entry_point, + compilation_options: Default::default(), + }, + primitive: wgpu::PrimitiveState::default(), + depth_stencil: None, + multisample: wgpu::MultisampleState::default(), + fragment: Some(wgpu::FragmentState { + module: &shader, + entry_point: "fragment_main", + compilation_options: Default::default(), + targets: &[Some(wgpu::ColorTargetState { + format: wgpu::TextureFormat::Rgba8Unorm, + blend: None, + write_mask: wgpu::ColorWrites::ALL, + })], + }), + multiview: None, + cache: None, + }; + + let pipeline = ctx.device.create_render_pipeline(&pipeline_desc); + + let expected = test.checksums; + let buffer_size = (std::mem::size_of_val(&expected[0]) * expected.len()) as u64; + let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: buffer_size, + usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ, + mapped_at_creation: false, + }); + + let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor { + label: None, + size: buffer_size, + usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE, + mapped_at_creation: false, + }); + + let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor { + label: None, + layout: &bgl, + entries: &[wgpu::BindGroupEntry { + binding: 0, + resource: gpu_buffer.as_entire_binding(), + }], + }); + + let mut encoder1 = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + + let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor { + label: None, + color_attachments: &[Some(wgpu::RenderPassColorAttachment { + ops: wgpu::Operations::default(), + resolve_target: None, + view: &dummy, + })], + depth_stencil_attachment: None, + timestamp_writes: None, + occlusion_query_set: None, + }); + + rpass.set_vertex_buffer(0, buffer_input.slice(..)); + rpass.set_pipeline(&pipeline); + rpass.set_bind_group(0, &bg, &[]); + + // Draw three vertices and no instance, which is enough to generate the + // checksums. + rpass.draw(0..3, 0..1); + + drop(rpass); + + let mut encoder2 = ctx + .device + .create_command_encoder(&wgpu::CommandEncoderDescriptor::default()); + + encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size); + + // See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions + // with a hard wait in between. + ctx.queue.submit([encoder1.finish()]); + ctx.async_poll(wgpu::Maintain::wait()) + .await + .panic_on_timeout(); + ctx.queue.submit([encoder2.finish()]); + let slice = cpu_buffer.slice(..); + slice.map_async(wgpu::MapMode::Read, |_| ()); + ctx.async_poll(wgpu::Maintain::wait()) + .await + .panic_on_timeout(); + let data: Vec = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec(); + + let case_name = format!("Case {:?}", test.case); + + // Calculate the difference between data and expected. Since the data is + // a bunch of float checksums, we allow a fairly large epsilon, which helps + // with the accumulation of float rounding errors. + const EPSILON: f32 = 0.05; + + let mut deltas = data.iter().zip(expected.iter()).map(|(d, e)| (d - e).abs()); + if deltas.any(|x| x > EPSILON) { + eprintln!( + "Failed: Got: {:?} Expected: {:?} - {case_name}", + data, expected, + ); + failed = true; + continue; + } + + eprintln!("Passed: {case_name}"); + } + + assert!(!failed); +} + +#[gpu_test] +static VERTEX_FORMATS_ALL: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .test_features_limits() + .features(wgpu::Features::VERTEX_WRITABLE_STORAGE), + ) + .run_async(vertex_formats_all); + +// Some backends can handle Unorm-10-10-2, but GL backends seem to throw this error: +// Validation Error: GL_INVALID_ENUM in glVertexAttribFormat(type = GL_UNSIGNED_INT_10_10_10_2) +#[gpu_test] +static VERTEX_FORMATS_10_10_10_2: GpuTestConfiguration = GpuTestConfiguration::new() + .parameters( + TestParameters::default() + .expect_fail(FailureCase::backend(wgpu::Backends::GL)) + .test_features_limits() + .features(wgpu::Features::VERTEX_WRITABLE_STORAGE), + ) + .run_async(vertex_formats_10_10_10_2); diff --git a/tests/tests/vertex_indices/mod.rs b/tests/tests/vertex_indices/mod.rs index 59048ef31c1..dcc2ca82f54 100644 --- a/tests/tests/vertex_indices/mod.rs +++ b/tests/tests/vertex_indices/mod.rs @@ -166,7 +166,6 @@ struct Test { id_source: IdSource, draw_call_kind: DrawCallKind, encoder_kind: EncoderKind, - vertex_pulling_transform: bool, } impl Test { @@ -280,15 +279,6 @@ async fn vertex_index_common(ctx: TestingContext) { cache: None, }; let builtin_pipeline = ctx.device.create_render_pipeline(&pipeline_desc); - pipeline_desc - .vertex - .compilation_options - .vertex_pulling_transform = true; - let builtin_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc); - pipeline_desc - .vertex - .compilation_options - .vertex_pulling_transform = false; pipeline_desc.vertex.entry_point = "vs_main_buffers"; pipeline_desc.vertex.buffers = &[ @@ -304,15 +294,6 @@ async fn vertex_index_common(ctx: TestingContext) { }, ]; let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc); - pipeline_desc - .vertex - .compilation_options - .vertex_pulling_transform = true; - let buffer_pipeline_vpt = ctx.device.create_render_pipeline(&pipeline_desc); - pipeline_desc - .vertex - .compilation_options - .vertex_pulling_transform = false; let dummy = ctx .device @@ -341,18 +322,12 @@ async fn vertex_index_common(ctx: TestingContext) { .cartesian_product(IdSource::iter()) .cartesian_product(DrawCallKind::iter()) .cartesian_product(EncoderKind::iter()) - .cartesian_product([false, true]) - .map( - |((((case, id_source), draw_call_kind), encoder_kind), vertex_pulling_transform)| { - Test { - case, - id_source, - draw_call_kind, - encoder_kind, - vertex_pulling_transform, - } - }, - ) + .map(|(((case, id_source), draw_call_kind), encoder_kind)| Test { + case, + id_source, + draw_call_kind, + encoder_kind, + }) .collect::>(); let features = ctx.adapter.features(); @@ -360,20 +335,8 @@ async fn vertex_index_common(ctx: TestingContext) { let mut failed = false; for test in tests { let pipeline = match test.id_source { - IdSource::Buffers => { - if test.vertex_pulling_transform { - &buffer_pipeline_vpt - } else { - &buffer_pipeline - } - } - IdSource::Builtins => { - if test.vertex_pulling_transform { - &builtin_pipeline_vpt - } else { - &builtin_pipeline - } - } + IdSource::Buffers => &buffer_pipeline, + IdSource::Builtins => &builtin_pipeline, }; let expected = test.expectation(&ctx); diff --git a/wgpu-core/src/device/global.rs b/wgpu-core/src/device/global.rs index 5ebd7c7de78..413a6edb453 100644 --- a/wgpu-core/src/device/global.rs +++ b/wgpu-core/src/device/global.rs @@ -1484,7 +1484,6 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, - vertex_pulling_transform: desc.vertex.stage.vertex_pulling_transform, }; ResolvedVertexState { stage, @@ -1511,7 +1510,6 @@ impl Global { .vertex .stage .zero_initialize_workgroup_memory, - vertex_pulling_transform: state.stage.vertex_pulling_transform, }; Some(ResolvedFragmentState { stage, @@ -1720,7 +1718,6 @@ impl Global { entry_point: desc.stage.entry_point.clone(), constants: desc.stage.constants.clone(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, - vertex_pulling_transform: desc.stage.vertex_pulling_transform, }; let desc = ResolvedComputePipelineDescriptor { diff --git a/wgpu-core/src/device/resource.rs b/wgpu-core/src/device/resource.rs index ee943d7fdc7..9cf8b21c388 100644 --- a/wgpu-core/src/device/resource.rs +++ b/wgpu-core/src/device/resource.rs @@ -2694,7 +2694,6 @@ impl Device { entry_point: final_entry_point_name.as_ref(), constants: desc.stage.constants.as_ref(), zero_initialize_workgroup_memory: desc.stage.zero_initialize_workgroup_memory, - vertex_pulling_transform: false, }, cache: cache.as_ref().and_then(|it| it.raw.as_ref()), }; @@ -3114,7 +3113,6 @@ impl Device { entry_point: &vertex_entry_point_name, constants: stage_desc.constants.as_ref(), zero_initialize_workgroup_memory: stage_desc.zero_initialize_workgroup_memory, - vertex_pulling_transform: stage_desc.vertex_pulling_transform, } }; @@ -3171,7 +3169,6 @@ impl Device { zero_initialize_workgroup_memory: fragment_state .stage .zero_initialize_workgroup_memory, - vertex_pulling_transform: false, }) } None => None, diff --git a/wgpu-core/src/pipeline.rs b/wgpu-core/src/pipeline.rs index 6366279eff1..2ab49f83d0c 100644 --- a/wgpu-core/src/pipeline.rs +++ b/wgpu-core/src/pipeline.rs @@ -147,8 +147,6 @@ pub struct ProgrammableStageDescriptor<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, - /// Should the pipeline attempt to transform vertex shaders to use vertex pulling. - pub vertex_pulling_transform: bool, } /// Describes a programmable pipeline stage. @@ -176,8 +174,6 @@ pub struct ResolvedProgrammableStageDescriptor<'a, A: HalApi> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, - /// Should the pipeline attempt to transform vertex shaders to use vertex pulling. - pub vertex_pulling_transform: bool, } /// Number of implicit bind groups derived at pipeline creation. diff --git a/wgpu-hal/examples/halmark/main.rs b/wgpu-hal/examples/halmark/main.rs index daed0c1d35f..30ff45ff5bd 100644 --- a/wgpu-hal/examples/halmark/main.rs +++ b/wgpu-hal/examples/halmark/main.rs @@ -257,7 +257,6 @@ impl Example { entry_point: "vs_main", constants: &constants, zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }, vertex_buffers: &[], fragment_stage: Some(hal::ProgrammableStage { @@ -265,7 +264,6 @@ impl Example { entry_point: "fs_main", constants: &constants, zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }), primitive: wgt::PrimitiveState { topology: wgt::PrimitiveTopology::TriangleStrip, diff --git a/wgpu-hal/examples/ray-traced-triangle/main.rs b/wgpu-hal/examples/ray-traced-triangle/main.rs index 1cde9fa2516..7cd6547f2c4 100644 --- a/wgpu-hal/examples/ray-traced-triangle/main.rs +++ b/wgpu-hal/examples/ray-traced-triangle/main.rs @@ -379,7 +379,6 @@ impl Example { entry_point: "main", constants: &Default::default(), zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, }, cache: None, }) diff --git a/wgpu-hal/src/lib.rs b/wgpu-hal/src/lib.rs index b28a005a7ab..9df479b13c1 100644 --- a/wgpu-hal/src/lib.rs +++ b/wgpu-hal/src/lib.rs @@ -1866,8 +1866,6 @@ pub struct ProgrammableStage<'a, A: Api> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, - /// Should the pipeline attempt to transform vertex shaders to use vertex pulling. - pub vertex_pulling_transform: bool, } // Rust gets confused about the impl requirements for `A` @@ -1878,7 +1876,6 @@ impl Clone for ProgrammableStage<'_, A> { entry_point: self.entry_point, constants: self.constants, zero_initialize_workgroup_memory: self.zero_initialize_workgroup_memory, - vertex_pulling_transform: self.vertex_pulling_transform, } } } diff --git a/wgpu-hal/src/metal/device.rs b/wgpu-hal/src/metal/device.rs index efafc98e1b3..d9525999d8b 100644 --- a/wgpu-hal/src/metal/device.rs +++ b/wgpu-hal/src/metal/device.rs @@ -158,7 +158,7 @@ impl super::Device { metal::MTLPrimitiveTopologyClass::Point => true, _ => false, }, - vertex_pulling_transform: stage.vertex_pulling_transform, + vertex_pulling_transform: true, vertex_buffer_mappings: vertex_buffer_mappings.to_vec(), }; diff --git a/wgpu/src/backend/wgpu_core.rs b/wgpu/src/backend/wgpu_core.rs index 91629d638c4..fae25422829 100644 --- a/wgpu/src/backend/wgpu_core.rs +++ b/wgpu/src/backend/wgpu_core.rs @@ -1180,10 +1180,6 @@ impl crate::Context for ContextWgpuCore { .vertex .compilation_options .zero_initialize_workgroup_memory, - vertex_pulling_transform: desc - .vertex - .compilation_options - .vertex_pulling_transform, }, buffers: Borrowed(&vertex_buffers), }, @@ -1198,7 +1194,6 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: frag .compilation_options .zero_initialize_workgroup_memory, - vertex_pulling_transform: false, }, targets: Borrowed(frag.targets), }), @@ -1244,7 +1239,6 @@ impl crate::Context for ContextWgpuCore { zero_initialize_workgroup_memory: desc .compilation_options .zero_initialize_workgroup_memory, - vertex_pulling_transform: false, }, cache: desc.cache.map(|c| c.id.into()), }; diff --git a/wgpu/src/lib.rs b/wgpu/src/lib.rs index d895b696cf4..190e99f7073 100644 --- a/wgpu/src/lib.rs +++ b/wgpu/src/lib.rs @@ -2059,8 +2059,6 @@ pub struct PipelineCompilationOptions<'a> { /// This is required by the WebGPU spec, but may have overhead which can be avoided /// for cross-platform applications pub zero_initialize_workgroup_memory: bool, - /// Should the pipeline attempt to transform vertex shaders to use vertex pulling. - pub vertex_pulling_transform: bool, } impl<'a> Default for PipelineCompilationOptions<'a> { @@ -2074,7 +2072,6 @@ impl<'a> Default for PipelineCompilationOptions<'a> { Self { constants, zero_initialize_workgroup_memory: true, - vertex_pulling_transform: false, } } }