From 205f1e3ab60a4c8c6b6f901803eb9cfc3a5b62f3 Mon Sep 17 00:00:00 2001 From: teoxoy <28601907+teoxoy@users.noreply.github.com> Date: Mon, 22 Jul 2024 11:21:06 +0200 Subject: [PATCH] [wgpu-core] fix length of copy in `queue_write_texture` #2 Follow-up to 6f16ea460ab437173e14d2f5f3584ca7e1c9841d & 7e112ca4c0686c9626be4c8ddd113e954a2dab21. --- tests/tests/write_texture.rs | 45 +++++++++++++++- wgpu-core/src/command/transfer.rs | 14 ++--- wgpu-core/src/device/queue.rs | 87 ++++++++++++++----------------- 3 files changed, 88 insertions(+), 58 deletions(-) diff --git a/tests/tests/write_texture.rs b/tests/tests/write_texture.rs index f8d99d6d14..fbb0485918 100644 --- a/tests/tests/write_texture.rs +++ b/tests/tests/write_texture.rs @@ -32,7 +32,7 @@ static WRITE_TEXTURE_SUBSET_2D: GpuTestConfiguration = origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - bytemuck::cast_slice(&data), + &data, wgpu::ImageDataLayout { offset: 0, bytes_per_row: Some(size), @@ -127,7 +127,7 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration = origin: wgpu::Origin3d::ZERO, aspect: wgpu::TextureAspect::All, }, - bytemuck::cast_slice(&data), + &data, wgpu::ImageDataLayout { offset: 0, bytes_per_row: Some(size), @@ -191,3 +191,44 @@ static WRITE_TEXTURE_SUBSET_3D: GpuTestConfiguration = assert_eq!(*byte, 0); } }); + +#[gpu_test] +static WRITE_TEXTURE_NO_OOB: GpuTestConfiguration = + GpuTestConfiguration::new().run_async(|ctx| async move { + let size = 256; + + let tex = ctx.device.create_texture(&wgpu::TextureDescriptor { + label: None, + dimension: wgpu::TextureDimension::D2, + size: wgpu::Extent3d { + width: size, + height: size, + depth_or_array_layers: 1, + }, + format: wgpu::TextureFormat::R8Uint, + usage: wgpu::TextureUsages::COPY_DST, + mip_level_count: 1, + sample_count: 1, + view_formats: &[], + }); + let data = vec![1u8; size as usize * 2 + 100]; // check that we don't attempt to copy OOB internally by adding 100 bytes here + ctx.queue.write_texture( + wgpu::ImageCopyTexture { + texture: &tex, + mip_level: 0, + origin: wgpu::Origin3d::ZERO, + aspect: wgpu::TextureAspect::All, + }, + &data, + wgpu::ImageDataLayout { + offset: 0, + bytes_per_row: Some(size), + rows_per_image: Some(size), + }, + wgpu::Extent3d { + width: size, + height: 2, + depth_or_array_layers: 1, + }, + ); + }); diff --git a/wgpu-core/src/command/transfer.rs b/wgpu-core/src/command/transfer.rs index 4379777eb5..0e4c21f999 100644 --- a/wgpu-core/src/command/transfer.rs +++ b/wgpu-core/src/command/transfer.rs @@ -225,7 +225,7 @@ pub(crate) fn validate_linear_texture_data( // the copy size before calling this function (for example via `validate_texture_copy_range`). let copy_width = copy_size.width as BufferAddress; let copy_height = copy_size.height as BufferAddress; - let copy_depth = copy_size.depth_or_array_layers as BufferAddress; + let depth_or_array_layers = copy_size.depth_or_array_layers as BufferAddress; let offset = layout.offset; @@ -253,19 +253,19 @@ pub(crate) fn validate_linear_texture_data( } bytes_per_row } else { - if copy_depth > 1 || height_in_blocks > 1 { + if depth_or_array_layers > 1 || height_in_blocks > 1 { return Err(TransferError::UnspecifiedBytesPerRow); } 0 }; - let block_rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { + let rows_per_image = if let Some(rows_per_image) = layout.rows_per_image { let rows_per_image = rows_per_image as BufferAddress; if rows_per_image < height_in_blocks { return Err(TransferError::InvalidRowsPerImage); } rows_per_image } else { - if copy_depth > 1 { + if depth_or_array_layers > 1 { return Err(TransferError::UnspecifiedRowsPerImage); } 0 @@ -287,12 +287,12 @@ pub(crate) fn validate_linear_texture_data( } } - let bytes_per_image = bytes_per_row * block_rows_per_image; + let bytes_per_image = bytes_per_row * rows_per_image; - let required_bytes_in_copy = if copy_depth == 0 { + let required_bytes_in_copy = if depth_or_array_layers == 0 { 0 } else { - let mut required_bytes_in_copy = bytes_per_image * (copy_depth - 1); + let mut required_bytes_in_copy = bytes_per_image * (depth_or_array_layers - 1); if height_in_blocks > 0 { required_bytes_in_copy += bytes_per_row * (height_in_blocks - 1) + bytes_in_last_row; } diff --git a/wgpu-core/src/device/queue.rs b/wgpu-core/src/device/queue.rs index 220085f8f7..833d6c2c95 100644 --- a/wgpu-core/src/device/queue.rs +++ b/wgpu-core/src/device/queue.rs @@ -666,7 +666,7 @@ impl Global { // Note: `_source_bytes_per_array_layer` is ignored since we // have a staging copy, and it can have a different value. - let (_, _source_bytes_per_array_layer) = validate_linear_texture_data( + let (required_bytes_in_copy, _source_bytes_per_array_layer) = validate_linear_texture_data( data_layout, dst.desc.format, destination.aspect, @@ -682,32 +682,6 @@ impl Global { .map_err(TransferError::from)?; } - let (block_width, block_height) = dst.desc.format.block_dimensions(); - let width_blocks = size.width / block_width; - let height_blocks = size.height / block_height; - - let block_rows_per_image = data_layout.rows_per_image.unwrap_or( - // doesn't really matter because we need this only if we copy - // more than one layer, and then we validate for this being not - // None - height_blocks, - ); - - let block_size = dst - .desc - .format - .block_copy_size(Some(destination.aspect)) - .unwrap(); - let bytes_per_row_alignment = - get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size); - let stage_bytes_per_row = - wgt::math::align_to(block_size * width_blocks, bytes_per_row_alignment); - - let block_rows_in_copy = - (size.depth_or_array_layers - 1) * block_rows_per_image + height_blocks; - let stage_size = - wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64).unwrap(); - let mut pending_writes = device.pending_writes.lock(); let encoder = pending_writes.activate(); @@ -763,33 +737,47 @@ impl Global { let dst_raw = dst.try_raw(&snatch_guard)?; - let bytes_per_row = data_layout - .bytes_per_row - .unwrap_or(width_blocks * block_size); + let (block_width, block_height) = dst.desc.format.block_dimensions(); + let width_in_blocks = size.width / block_width; + let height_in_blocks = size.height / block_height; + + let block_size = dst + .desc + .format + .block_copy_size(Some(destination.aspect)) + .unwrap(); + let bytes_in_last_row = width_in_blocks * block_size; + + let bytes_per_row = data_layout.bytes_per_row.unwrap_or(bytes_in_last_row); + let rows_per_image = data_layout.rows_per_image.unwrap_or(height_in_blocks); + + let bytes_per_row_alignment = + get_lowest_common_denom(device.alignments.buffer_copy_pitch.get() as u32, block_size); + let stage_bytes_per_row = wgt::math::align_to(bytes_in_last_row, bytes_per_row_alignment); // Platform validation requires that the staging buffer always be // freed, even if an error occurs. All paths from here must call // `device.pending_writes.consume`. - let mut staging_buffer = StagingBuffer::new(device, stage_size)?; - - if stage_bytes_per_row == bytes_per_row { + let staging_buffer = if stage_bytes_per_row == bytes_per_row { profiling::scope!("copy aligned"); // Fast path if the data is already being aligned optimally. - unsafe { - staging_buffer.write_with_offset( - data, - data_layout.offset as isize, - 0, - (data.len() as u64 - data_layout.offset) as usize, - ); - } + let stage_size = wgt::BufferSize::new(required_bytes_in_copy).unwrap(); + let mut staging_buffer = StagingBuffer::new(device, stage_size)?; + staging_buffer.write(&data[data_layout.offset as usize..]); + staging_buffer } else { profiling::scope!("copy chunked"); // Copy row by row into the optimal alignment. + let block_rows_in_copy = + (size.depth_or_array_layers - 1) * rows_per_image + height_in_blocks; + let stage_size = + wgt::BufferSize::new(stage_bytes_per_row as u64 * block_rows_in_copy as u64) + .unwrap(); + let mut staging_buffer = StagingBuffer::new(device, stage_size)?; let copy_bytes_per_row = stage_bytes_per_row.min(bytes_per_row) as usize; for layer in 0..size.depth_or_array_layers { - let rows_offset = layer * block_rows_per_image; - for row in rows_offset..rows_offset + height_blocks { + let rows_offset = layer * rows_per_image; + for row in rows_offset..rows_offset + height_in_blocks { let src_offset = data_layout.offset as u32 + row * bytes_per_row; let dst_offset = row * stage_bytes_per_row; unsafe { @@ -802,20 +790,21 @@ impl Global { } } } - } + staging_buffer + }; let staging_buffer = staging_buffer.flush(); - let regions = (0..array_layer_count).map(|rel_array_layer| { + let regions = (0..array_layer_count).map(|array_layer_offset| { let mut texture_base = dst_base.clone(); - texture_base.array_layer += rel_array_layer; + texture_base.array_layer += array_layer_offset; hal::BufferTextureCopy { buffer_layout: wgt::ImageDataLayout { - offset: rel_array_layer as u64 - * block_rows_per_image as u64 + offset: array_layer_offset as u64 + * rows_per_image as u64 * stage_bytes_per_row as u64, bytes_per_row: Some(stage_bytes_per_row), - rows_per_image: Some(block_rows_per_image), + rows_per_image: Some(rows_per_image), }, texture_base, size: hal_copy_size,