Skip to content

Commit

Permalink
[wgpu-core] fix length of copy in queue_write_texture #2
Browse files Browse the repository at this point in the history
Follow-up to 6f16ea4 & 7e112ca.
  • Loading branch information
teoxoy committed Jul 22, 2024
1 parent 101c996 commit 205f1e3
Show file tree
Hide file tree
Showing 3 changed files with 88 additions and 58 deletions.
45 changes: 43 additions & 2 deletions tests/tests/write_texture.rs
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration =
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
bytemuck::cast_slice(&data),
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
Expand Down Expand Up @@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
bytemuck::cast_slice(&data),
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
Expand Down Expand Up @@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration =
assert_eq!(*byte, 0);
}
});

#[gpu_test]
static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration =
GpuTestConfiguration::new().run_async(|ctx| async move {
let size = 256;

let tex = ctx.device.create_texture(&wgpu::TextureDescriptor {
label: None,
dimension: wgpu::TextureDimension::D2,
size: wgpu::Extent3d {
width: size,
height: size,
depth_or_array_layers: 1,
},
format: wgpu::TextureFormat::R8Uint,
usage: wgpu::TextureUsages::COPY_DST,
mip_level_count: 1,
sample_count: 1,
view_formats: &[],
});
let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here
ctx.queue.write_texture(
wgpu::ImageCopyTexture {
texture: &tex,
mip_level: 0,
origin: wgpu::Origin3d::ZERO,
aspect: wgpu::TextureAspect::All,
},
&data,
wgpu::ImageDataLayout {
offset: 0,
bytes_per_row: Some(size),
rows_per_image: Some(size),
},
wgpu::Extent3d {
width: size,
height: 2,
depth_or_array_layers: 1,
},
);
});
14 changes: 7 additions & 7 deletions wgpu-core/src/command/transfer.rs
Original file line number Diff line number Diff line change
Expand Up @@ -225,7 +225,7 @@ pub(crate) fn validate_linear_texture_data(
// the copy size before calling this function (for example via `validate_texture_copy_range`).
let copy_width = copy_size.width as BufferAddress;
let copy_height = copy_size.height as BufferAddress;
let copy_depth = copy_size.depth_or_array_layers as BufferAddress;
let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress;

let offset = layout.offset;

Expand Down Expand Up @@ -253,19 +253,19 @@ pub(crate) fn validate_linear_texture_data(
}
bytes_per_row
} else {
if copy_depth > 1 || height_in_blocks > 1 {
if depth_or_array_layers > 1 || height_in_blocks > 1 {
return Err(TransferError::UnspecifiedBytesPerRow);
}
0
};
let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image {
let rows_per_image = rows_per_image as BufferAddress;
if rows_per_image < height_in_blocks {
return Err(TransferError::InvalidRowsPerImage);
}
rows_per_image
} else {
if copy_depth > 1 {
if depth_or_array_layers > 1 {
return Err(TransferError::UnspecifiedRowsPerImage);
}
0
Expand All @@ -287,12 +287,12 @@ pub(crate) fn validate_linear_texture_data(
}
}

let bytes_per_image = bytes_per_row * block_rows_per_image;
let bytes_per_image = bytes_per_row * rows_per_image;

let required_bytes_in_copy = if copy_depth == 0 {
let required_bytes_in_copy = if depth_or_array_layers == 0 {
0
} else {
let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1);
let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1);
if height_in_blocks > 0 {
required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row;
}
Expand Down
87 changes: 38 additions & 49 deletions wgpu-core/src/device/queue.rs
Original file line number Diff line number Diff line change
Expand Up @@ -666,7 +666,7 @@ impl Global {

// Note: `_source_bytes_per_array_layer` is ignored since we
// have a staging copy, and it can have a different value.
let (_, _source_bytes_per_array_layer) = validate_linear_texture_data(
let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data(
data_layout,
dst.desc.format,
destination.aspect,
Expand All @@ -682,32 +682,6 @@ impl Global {
.map_err(TransferError::from)?;
}

let (block_width, block_height) = dst.desc.format.block_dimensions();
let width_blocks = size.width / block_width;
let height_blocks = size.height / block_height;

let block_rows_per_image = data_layout.rows_per_image.unwrap_or(
// doesn't really matter because we need this only if we copy
// more than one layer, and then we validate for this being not
// None
height_blocks,
);

let block_size = dst
.desc
.format
.block_copy_size(Some(destination.aspect))
.unwrap();
let bytes_per_row_alignment =
get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
let stage_bytes_per_row =
wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment);

let block_rows_in_copy =
(size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks;
let stage_size =
wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap();

let mut pending_writes = device.pending_writes.lock();
let encoder = pending_writes.activate();

Expand Down Expand Up @@ -763,33 +737,47 @@ impl Global {

let dst_raw = dst.try_raw(&snatch_guard)?;

let bytes_per_row = data_layout
.bytes_per_row
.unwrap_or(width_blocks * block_size);
let (block_width, block_height) = dst.desc.format.block_dimensions();
let width_in_blocks = size.width / block_width;
let height_in_blocks = size.height / block_height;

let block_size = dst
.desc
.format
.block_copy_size(Some(destination.aspect))
.unwrap();
let bytes_in_last_row = width_in_blocks * block_size;

let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row);
let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks);

let bytes_per_row_alignment =
get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size);
let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment);

// Platform validation requires that the staging buffer always be
// freed, even if an error occurs. All paths from here must call
// `device.pending_writes.consume`.
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;

if stage_bytes_per_row == bytes_per_row {
let staging_buffer = if stage_bytes_per_row == bytes_per_row {
profiling::scope!("copy aligned");
// Fast path if the data is already being aligned optimally.
unsafe {
staging_buffer.write_with_offset(
data,
data_layout.offset as isize,
0,
(data.len() as u64 - data_layout.offset) as usize,
);
}
let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap();
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
staging_buffer.write(&data[data_layout.offset as usize..]);
staging_buffer
} else {
profiling::scope!("copy chunked");
// Copy row by row into the optimal alignment.
let block_rows_in_copy =
(size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks;
let stage_size =
wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64)
.unwrap();
let mut staging_buffer = StagingBuffer::new(device, stage_size)?;
let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize;
for layer in 0..size.depth_or_array_layers {
let rows_offset = layer * block_rows_per_image;
for row in rows_offset..rows_offset + height_blocks {
let rows_offset = layer * rows_per_image;
for row in rows_offset..rows_offset + height_in_blocks {
let src_offset = data_layout.offset as u32 + row * bytes_per_row;
let dst_offset = row * stage_bytes_per_row;
unsafe {
Expand All @@ -802,20 +790,21 @@ impl Global {
}
}
}
}
staging_buffer
};

let staging_buffer = staging_buffer.flush();

let regions = (0..array_layer_count).map(|rel_array_layer| {
let regions = (0..array_layer_count).map(|array_layer_offset| {
let mut texture_base = dst_base.clone();
texture_base.array_layer += rel_array_layer;
texture_base.array_layer += array_layer_offset;
hal::BufferTextureCopy {
buffer_layout: wgt::ImageDataLayout {
offset: rel_array_layer as u64
* block_rows_per_image as u64
offset: array_layer_offset as u64
* rows_per_image as u64
* stage_bytes_per_row as u64,
bytes_per_row: Some(stage_bytes_per_row),
rows_per_image: Some(block_rows_per_image),
rows_per_image: Some(rows_per_image),
},
texture_base,
size: hal_copy_size,
Expand Down

0 comments on commit 205f1e3

Please sign in to comment.