Skip to content

Commit

Permalink
Workaround NV bug
Browse files Browse the repository at this point in the history
  • Loading branch information
cwfitzgerald committed Sep 13, 2023
1 parent 7fea9e9 commit 7710f79
Show file tree
Hide file tree
Showing 5 changed files with 172 additions and 9 deletions.
123 changes: 123 additions & 0 deletions tests/tests/regression/issue_4122.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
use std::{num::NonZeroU64, ops::Range};

use wasm_bindgen_test::wasm_bindgen_test;
use wgpu_test::{initialize_test, TestParameters, TestingContext};

fn fill_test(ctx: &TestingContext, range: Range<u64>, size: u64) -> bool {
let gpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("gpu_buffer"),
size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::COPY_SRC,
mapped_at_creation: false,
});

let cpu_buffer = ctx.device.create_buffer(&wgpu::BufferDescriptor {
label: Some("cpu_buffer"),
size,
usage: wgpu::BufferUsages::COPY_DST | wgpu::BufferUsages::MAP_READ,
mapped_at_creation: false,
});

// Initialize the whole buffer with values.
let buffer_contents = vec![0xFF_u8; size as usize];
ctx.queue.write_buffer(&gpu_buffer, 0, &buffer_contents);

let mut encoder = ctx
.device
.create_command_encoder(&wgpu::CommandEncoderDescriptor {
label: Some("encoder"),
});

encoder.clear_buffer(
&gpu_buffer,
range.start,
NonZeroU64::new(range.end - range.start),
);
encoder.copy_buffer_to_buffer(&gpu_buffer, 0, &cpu_buffer, 0, size);

ctx.queue.submit(Some(encoder.finish()));
cpu_buffer.slice(..).map_async(wgpu::MapMode::Read, |_| ());
ctx.device.poll(wgpu::Maintain::Wait);

let buffer_slice = cpu_buffer.slice(..);
let buffer_data = buffer_slice.get_mapped_range();

let first_clear_byte = buffer_data
.iter()
.enumerate()
.find_map(|(index, byte)| (*byte == 0x00).then_some(index))
.expect("No clear happened at all");

let first_dirty_byte = buffer_data
.iter()
.enumerate()
.skip(first_clear_byte)
.find_map(|(index, byte)| (*byte != 0x00).then_some(index))
.unwrap_or(size as usize);

let second_clear_byte = buffer_data
.iter()
.enumerate()
.skip(first_dirty_byte)
.find_map(|(index, byte)| (*byte == 0x00).then_some(index));

if second_clear_byte.is_some() {
eprintln!("Found multiple cleared ranges instead of a single clear range of {}..{} on a buffer of size {}.", range.start, range.end, size);
return false;
}

let cleared_range = first_clear_byte as u64..first_dirty_byte as u64;

if cleared_range != range {
eprintln!(
"Cleared range is {}..{}, but the clear range is {}..{} on a buffer of size {}.",
cleared_range.start, cleared_range.end, range.start, range.end, size
);
return false;
}

eprintln!(
"Cleared range is {}..{} on a buffer of size {}.",
cleared_range.start, cleared_range.end, size
);

return true;
}

#[wasm_bindgen_test]
#[test]
fn clear_buffer_bug() {
initialize_test(TestParameters::default(), |ctx| {
let mut succeeded = true;
for power in 4..16 {
let size = 1 << power;
for start_offset in (0..=68).step_by(4) {
for size_offset in (0..=68).step_by(4) {
let range = start_offset..size + size_offset + start_offset;
let result = fill_test(&ctx, range.clone(), 1 << 16);

let copy_length = range.end - range.start;

// This is the precondiiton for the nvidia bug. This is for reference only,
// as the bug is worked around.
let _succeeds = 'b: {
if copy_length >= 4096 {
if start_offset % 16 != 0 {
if copy_length == 4096 {
break 'b true;
}
if copy_length % 16 == 0 {
break 'b false;
}
}
}
true
};

succeeded &= result;
}
}
}
assert!(succeeded);
});
}
1 change: 1 addition & 0 deletions tests/tests/root.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use wasm_bindgen_test::wasm_bindgen_test_configure;
mod regression {
mod issue_3457;
mod issue_4024;
mod issue_4122;
}

mod bind_group_layout_dedup;
Expand Down
4 changes: 4 additions & 0 deletions wgpu-hal/src/vulkan/adapter.rs
Original file line number Diff line number Diff line change
Expand Up @@ -984,6 +984,10 @@ impl super::Instance {
super::Workarounds::EMPTY_RESOLVE_ATTACHMENT_LISTS,
phd_capabilities.properties.vendor_id == db::qualcomm::VENDOR,
);
workarounds.set(
super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16,
phd_capabilities.properties.vendor_id == db::nvidia::VENDOR,
);
};

if phd_capabilities.effective_api_version == vk::API_VERSION_1_0
Expand Down
47 changes: 38 additions & 9 deletions wgpu-hal/src/vulkan/command.rs
Original file line number Diff line number Diff line change
Expand Up @@ -212,15 +212,44 @@ impl crate::CommandEncoder<super::Api> for super::CommandEncoder {
}

unsafe fn clear_buffer(&mut self, buffer: &super::Buffer, range: crate::MemoryRange) {
unsafe {
self.device.raw.cmd_fill_buffer(
self.active,
buffer.raw,
range.start,
range.end - range.start,
0,
)
};
let range_size = range.end - range.start;
if self.device.workarounds.contains(
super::Workarounds::FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16,
) && range_size >= 4096
&& range.start % 16 != 0
{
let rounded_start = wgt::math::align_to(range.start, 16);
let prefix_size = rounded_start - range.start;

unsafe {
self.device.raw.cmd_fill_buffer(
self.active,
buffer.raw,
range.start,
prefix_size,
0,
)
};

// This will never be zero, as rounding can only add up to 12 bytes, and the total size is 4096.
let suffix_size = range.end - rounded_start;

unsafe {
self.device.raw.cmd_fill_buffer(
self.active,
buffer.raw,
rounded_start,
suffix_size,
0,
)
};
} else {
unsafe {
self.device
.raw
.cmd_fill_buffer(self.active, buffer.raw, range.start, range_size, 0)
};
}
}

unsafe fn copy_buffer_to_buffer<T>(
Expand Down
6 changes: 6 additions & 0 deletions wgpu-hal/src/vulkan/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -207,6 +207,12 @@ bitflags::bitflags!(
/// Qualcomm OOMs when there are zero color attachments but a non-null pointer
/// to a subpass resolve attachment array. This nulls out that pointer in that case.
const EMPTY_RESOLVE_ATTACHMENT_LISTS = 0x2;
/// See issue_4122.rs for details on this nvidia issue.
///
/// All fill buffers with offsets not aligned to 16 bytes and size more than 4096 bytes
/// will not fill the correct range. Workaround this by splitting the fill into multiple
/// calls, one with small size and unaligned offset, and one with aligned offset and large size.
const FORCE_FILL_BUFFER_WITH_SIZE_GREATER_4096_ALIGNED_OFFSET_16 = 0x4;
}
);

Expand Down

0 comments on commit 7710f79

Please sign in to comment.