diff --git a/tests/tests/root.rs b/tests/tests/root.rs
index 1cb5b56c7c0..554a434ea1a 100644
--- a/tests/tests/root.rs
+++ b/tests/tests/root.rs
@@ -40,6 +40,7 @@ mod texture_bounds;
 mod texture_view_creation;
 mod transfer;
 mod vertex_indices;
+mod vertex_formats;
 mod write_texture;
 mod zero_init_texture_after_discard;
 
diff --git a/tests/tests/vertex_formats/draw.vert.wgsl b/tests/tests/vertex_formats/draw.vert.wgsl
new file mode 100644
index 00000000000..33546423231
--- /dev/null
+++ b/tests/tests/vertex_formats/draw.vert.wgsl
@@ -0,0 +1,221 @@
+struct EveryFormat {
+  // One location and attribute for each format in
+  // https://gpuweb.github.io/gpuweb/#dictdef-gpuvertexattribute
+  // ordered so we have 4-byte aligned packing.
+
+  // Start with all the formats that are already 4-byte aligned.
+  @location(0) uint8x4: vec4<u32>,
+  @location(1) sint8x4: vec4<i32>,
+  @location(2) unorm8x4: vec4<f32>,
+  @location(3) snorm8x4: vec4<f32>,
+  @location(4) uint16x2: vec2<u32>,
+  @location(5) uint16x4: vec4<u32>,
+  @location(6) sint16x2: vec2<i32>,
+  @location(7) sint16x4: vec4<i32>,
+  @location(8) unorm16x2: vec2<f32>,
+  @location(9) unorm16x4: vec4<f32>,
+  @location(10) snorm16x2: vec2<f32>,
+  @location(11) snorm16x4: vec4<f32>,
+  @location(12) float16x2: vec2<f16>,
+  @location(13) float16x4: vec4<f16>,
+  @location(14) float32: f32,
+  @location(15) float32x2: vec2<f32>,
+  @location(16) float32x3: vec3<f32>,
+  @location(17) float32x4: vec4<f32>,
+  @location(18) uint32: u32,
+  @location(19) uint32x2: vec2<u32>,
+  @location(20) uint32x3: vec3<u32>,
+  @location(21) uint32x4: vec4<u32>,
+  @location(22) sint32: i32,
+  @location(23) sint32x2: vec2<i32>,
+  @location(24) sint32x3: vec3<i32>,
+  @location(25) sint32x4: vec4<i32>,
+  @location(26) unorm10_10_10_2: vec4<f32>,
+
+  // Now all the formats that are 2-byte aligned.
+  @location(27) uint8x2: vec2<u32>,
+  @location(28) sint8x2: vec2<i32>,
+  @location(29) unorm8x2: vec2<f32>,
+  @location(30) snorm8x2: vec2<f32>,
+}
+
+struct Checksums {
+  @location(0) uint: u32,
+  @location(1) sint: i32,
+  @location(2) unorm: f32,
+  @location(3) snorm: f32,
+  @location(4) float16: f16,
+  @location(5) float32: f32,
+}
+
+@vertex
+fn vertex_main(v_in: EveryFormat) -> Checksums
+{
+  // Accumulate all uint into one checksum value.
+  var all_uint: u32 = 0;
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint8x4.w);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint16x4.w);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x2.y);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x3.z);
+
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.x);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.y);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.z);
+  all_uint = accumulate_uint(all_uint, v_in.uint32x4.w);
+
+
+  // Accumulate all sint into one checksum value.
+  var all_sint: i32 = 0;
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint8x4.w);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint16x4.w);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x2.y);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x3.z);
+
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.x);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.y);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.z);
+  all_sint = accumulate_sint(all_sint, v_in.sint32x4.w);
+
+
+  // Accumulate all unorm into one checksum value.
+  var all_unorm: f32 = 0.0;
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm8x4.w);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x2.y);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm16x4.w);
+
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.x);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.y);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.z);
+  all_unorm = accumulate_unorm(all_unorm, v_in.unorm10_10_10_2.w);
+
+
+  // Accumulate all snorm into one checksum value.
+  var all_snorm: f32 = 0.0;
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm8x4.w);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x2.y);
+
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.x);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.y);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.z);
+  all_snorm = accumulate_snorm(all_snorm, v_in.snorm16x4.w);
+
+
+  // Accumulate all float16 into one checksum value.
+  var all_float16: f16 = 0.0;
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.x);
+  all_float16 = accumulate_float16(all_float16, v_in.float16x2.y);
+
+
+  // Accumulate all float32 into one checksum value.
+  var all_float32: f32 = 0.0;
+  all_float32 = accumulate_float32(all_float32, v_in.float32);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x2.y);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x3.z);
+
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.x);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.y);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.z);
+  all_float32 = accumulate_float32(all_float32, v_in.float32x4.w);
+
+
+  // Build the Checksum structure and return it.
+  var v_out: Checksums;
+
+  v_out.uint = all_uint;
+  v_out.sint = all_sint;
+  v_out.unorm = all_unorm;
+  v_out.snorm = all_snorm;
+  v_out.float16 = all_float16;
+  v_out.float32 = all_float32;
+
+  return v_out;
+}
+
+fn accumulate_uint(accum: u32, val: u32) -> u32 {
+  return accum + val;
+}
+
+fn accumulate_sint(accum: i32, val: i32) -> i32 {
+  return accum + val;
+}
+
+fn accumulate_unorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_snorm(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
+
+fn accumulate_float16(accum: f16, val: f16) -> f16 {
+  return accum + val;
+}
+
+fn accumulate_float32(accum: f32, val: f32) -> f32 {
+  return accum + val;
+}
diff --git a/tests/tests/vertex_formats/mod.rs b/tests/tests/vertex_formats/mod.rs
new file mode 100644
index 00000000000..2156a127fcb
--- /dev/null
+++ b/tests/tests/vertex_formats/mod.rs
@@ -0,0 +1,293 @@
+//! Tests that vertex formats pass through to vertex shaders accurately.
+
+use std::{num::NonZeroU64, ops::Range};
+
+use wgpu::util::{BufferInitDescriptor, DeviceExt, RenderEncoder};
+
+use wgpu_test::{gpu_test, GpuTestConfiguration, TestParameters, TestingContext};
+
+/// Generic struct representing a draw call
+struct Draw {
+    vertex: Range<u32>,
+}
+
+impl Draw {
+    /// Directly execute the draw call
+    fn execute(&self, rpass: &mut dyn RenderEncoder<'_>) {
+        rpass.draw(self.vertex.clone(), Range::<u32>::default());
+    }
+}
+
+#[derive(Debug, Copy, Clone)]
+enum TestCase {
+    /// A single draw call with 1 vertex
+    DrawOneVertex,
+}
+
+impl TestCase {
+    const ARRAY: [Self; 1] = [
+        Self::DrawOneVertex,
+    ];
+
+    // Get the draw calls for this test case
+    fn draws(&self) -> &'static [Draw] {
+        match self {
+            TestCase::DrawOneVertex => &[Draw {
+                vertex: 0..1,
+            }],
+        }
+    }
+}
+
+struct Test {
+    case: TestCase,
+}
+
+impl Test {
+    /// Get the expected result from this test, taking into account
+    /// the various features and capabilities that may be missing.
+    fn expectation(&self) -> &'static [f32] {
+        match self.case {
+            TestCase::DrawOneVertex => {
+                &[0.0, 0.0, 0.0, 0.0, 0.0, 0.0]
+            }
+        }
+    }
+}
+
+async fn vertex_index_common(ctx: TestingContext) {
+    let identity_buffer = ctx.device.create_buffer_init(&BufferInitDescriptor {
+        label: Some("identity buffer"),
+        contents: bytemuck::cast_slice(&[0u32, 1, 2, 3, 4, 5, 6, 7, 8]),
+        usage: wgpu::BufferUsages::VERTEX | wgpu::BufferUsages::INDEX,
+    });
+
+    let shader = ctx
+        .device
+        .create_shader_module(wgpu::include_wgsl!("draw.vert.wgsl"));
+
+    let bgl = ctx
+        .device
+        .create_bind_group_layout(&wgpu::BindGroupLayoutDescriptor {
+            label: None,
+            entries: &[wgpu::BindGroupLayoutEntry {
+                binding: 0,
+                ty: wgpu::BindingType::Buffer {
+                    ty: wgpu::BufferBindingType::Storage { read_only: false },
+                    has_dynamic_offset: false,
+                    min_binding_size: NonZeroU64::new(4),
+                },
+                visibility: wgpu::ShaderStages::VERTEX,
+                count: None,
+            }],
+        });
+
+    let ppl = ctx
+        .device
+        .create_pipeline_layout(&wgpu::PipelineLayoutDescriptor {
+            label: None,
+            bind_group_layouts: &[&bgl],
+            push_constant_ranges: &[],
+        });
+
+    let attributes = &wgpu::vertex_attr_array![
+        0 => Uint8x4,
+        1 => Sint8x4,
+        2 => Unorm8x4,
+        3 => Snorm8x4,
+        4 => Uint16x2,
+        5 => Uint16x4,
+        6 => Sint16x2,
+        7 => Sint16x4,
+        8 => Unorm16x2,
+        9 => Unorm16x4,
+        10 => Snorm16x2,
+        11 => Snorm16x4,
+        12 => Float16x2,
+        13 => Float16x4,
+        14 => Float32,
+        15 => Float32x2,
+        16 => Float32x3,
+        17 => Float32x4,
+        18 => Uint32,
+        19 => Uint32x2,
+        20 => Uint32x3,
+        21 => Uint32x4,
+        22 => Sint32,
+        23 => Sint32x2,
+        24 => Sint32x3,
+        25 => Sint32x4,
+        26 => Unorm10_10_10_2,
+        27 => Uint8x2,
+        28 => Sint8x2,
+        29 => Unorm8x2,
+        30 => Snorm8x2,
+    ];
+
+    let pipeline_desc = wgpu::RenderPipelineDescriptor {
+        label: None,
+        layout: Some(&ppl),
+        vertex: wgpu::VertexState {
+            buffers: &[
+                wgpu::VertexBufferLayout {
+                    array_stride: 0,  // Calculate, please!
+                    step_mode: wgpu::VertexStepMode::Vertex,
+                    attributes,
+                },
+            ],
+            module: &shader,
+            entry_point: "vertex_main",
+            compilation_options: Default::default(),
+        },
+        primitive: wgpu::PrimitiveState::default(),
+        depth_stencil: None,
+        multisample: wgpu::MultisampleState::default(),
+        fragment: None,
+        multiview: None,
+        cache: None,
+    };
+
+    let buffer_pipeline = ctx.device.create_render_pipeline(&pipeline_desc);
+
+    let mut tests = Vec::with_capacity(1);
+    for case in TestCase::ARRAY {
+        tests.push(Test {
+            case,
+        })
+    }
+
+    let mut failed = false;
+    for test in tests {
+        let pipeline = &buffer_pipeline;
+
+        let expected = test.expectation();
+
+        let buffer_size = 4 * expected.len() as u64;
+        let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
+            mapped_at_creation: false,
+        });
+
+        let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
+            label: None,
+            size: buffer_size,
+            usage: wgpu::BufferUsages::COPY_SRC | wgpu::BufferUsages::STORAGE,
+            mapped_at_creation: false,
+        });
+
+        let bg = ctx.device.create_bind_group(&wgpu::BindGroupDescriptor {
+            label: None,
+            layout: &bgl,
+            entries: &[wgpu::BindGroupEntry {
+                binding: 0,
+                resource: gpu_buffer.as_entire_binding(),
+            }],
+        });
+
+        let mut encoder1 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        let mut rpass = encoder1.begin_render_pass(&wgpu::RenderPassDescriptor {
+            label: None,
+            color_attachments: &[None],
+            depth_stencil_attachment: None,
+            timestamp_writes: None,
+            occlusion_query_set: None,
+        });
+
+        {
+            let render_encoder = &mut rpass;
+
+            render_encoder.set_vertex_buffer(0, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(1, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(2, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(3, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(4, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(5, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(6, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(7, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(8, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(9, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(10, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(11, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(12, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(13, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(14, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(15, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(16, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(17, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(18, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(19, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(20, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(21, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(22, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(23, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(24, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(25, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(26, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(27, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(28, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(29, identity_buffer.slice(..));
+            render_encoder.set_vertex_buffer(30, identity_buffer.slice(..));
+
+            render_encoder.set_pipeline(pipeline);
+            render_encoder.set_bind_group(0, &bg, &[]);
+
+            let draws = test.case.draws();
+
+            for draw in draws {
+                draw.execute(render_encoder);
+            }
+        }
+
+        drop(rpass);
+
+        let mut encoder2 = ctx
+            .device
+            .create_command_encoder(&wgpu::CommandEncoderDescriptor::default());
+
+        encoder2.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, buffer_size);
+
+        // See https://github.com/gfx-rs/wgpu/issues/4732 for why this is split between two submissions
+        // with a hard wait in between.
+        ctx.queue.submit([encoder1.finish()]);
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        ctx.queue.submit([encoder2.finish()]);
+        let slice = cpu_buffer.slice(..);
+        slice.map_async(wgpu::MapMode::Read, |_| ());
+        ctx.async_poll(wgpu::Maintain::wait())
+            .await
+            .panic_on_timeout();
+        let data: Vec<f32> = bytemuck::cast_slice(&slice.get_mapped_range()).to_vec();
+
+        let case_name = format!(
+            "Case {:?}",
+            test.case
+        );
+        if data != expected {
+            eprintln!(
+                "Failed: Got: {:?} Expected: {:?} - {case_name}",
+                data, expected,
+            );
+            failed = true;
+        } else {
+            eprintln!("Passed: {case_name}");
+        }
+    }
+
+    assert!(!failed);
+}
+
+#[gpu_test]
+static VERTEX_FORMATS: GpuTestConfiguration = GpuTestConfiguration::new()
+    .parameters(
+        TestParameters::default()
+            .test_features_limits()
+            .features(wgpu::Features::VERTEX_WRITABLE_STORAGE)
+    )
+    .run_async(vertex_index_common);