Skip to content

Commit

Permalink
Fix the example regressions from packed growable buffers. (#14375)
Browse files Browse the repository at this point in the history
The "uberbuffers" PR #14257 caused some examples to fail intermittently
for different reasons:

1. `morph_targets` could fail because vertex displacements for morph
targets are keyed off the vertex index. With buffer packing, the vertex
index can vary based on the position in the buffer, which caused the
morph targets to be potentially incorrect. The solution is to include
the first vertex index with the `MeshUniform` (and `MeshInputUniform` if
GPU preprocessing is in use), so that the shader can calculate the true
vertex index before performing the morph operation. This results in
wasted space in `MeshUniform`, which is unfortunate, but we'll soon be
filling in the padding with the ID of the material when bindless
textures land, so this had to happen sooner or later anyhow.

Including the vertex index in the `MeshInputUniform` caused an ordering
problem. The `MeshInputUniform` was created during the extraction phase,
before the allocations occurred, so the extraction logic didn't know
where the mesh vertex data was going to end up. The solution is to move
the `MeshInputUniform` creation (the `collect_meshes_for_gpu_building`
system) to after the allocations phase. This should be better for
parallelism anyhow, because it allows the extraction phase to finish
quicker. It's also something we'll have to do for bindless in any event.

2. The `lines` and `fog_volumes` examples could fail because their
custom drawing nodes weren't updated to supply the vertex and index
offsets in their `draw_indexed` and `draw` calls. This commit fixes this
oversight.

Fixes #14366.
  • Loading branch information
pcwalton authored Jul 22, 2024
1 parent d30391b commit d235d41
Show file tree
Hide file tree
Showing 7 changed files with 124 additions and 38 deletions.
2 changes: 1 addition & 1 deletion crates/bevy_pbr/src/meshlet/gpu_scene.rs
Original file line number Diff line number Diff line change
Expand Up @@ -138,7 +138,7 @@ pub fn extract_meshlet_meshes(
gpu_scene
.instance_uniforms
.get_mut()
.push(MeshUniform::new(&transforms, None));
.push(MeshUniform::new(&transforms, 0, None));
}
}

Expand Down
10 changes: 7 additions & 3 deletions crates/bevy_pbr/src/prepass/prepass.wgsl
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
#import bevy_pbr::{
prepass_bindings,
mesh_bindings::mesh,
mesh_functions,
prepass_io::{Vertex, VertexOutput, FragmentOutput},
skinning,
Expand All @@ -15,18 +16,21 @@
#ifdef MORPH_TARGETS
fn morph_vertex(vertex_in: Vertex) -> Vertex {
var vertex = vertex_in;
let first_vertex = mesh[vertex.instance_index].first_vertex_index;
let vertex_index = vertex.index - first_vertex;

let weight_count = morph::layer_count();
for (var i: u32 = 0u; i < weight_count; i ++) {
let weight = morph::weight_at(i);
if weight == 0.0 {
continue;
}
vertex.position += weight * morph::morph(vertex.index, morph::position_offset, i);
vertex.position += weight * morph::morph(vertex_index, morph::position_offset, i);
#ifdef VERTEX_NORMALS
vertex.normal += weight * morph::morph(vertex.index, morph::normal_offset, i);
vertex.normal += weight * morph::morph(vertex_index, morph::normal_offset, i);
#endif
#ifdef VERTEX_TANGENTS
vertex.tangent += vec4(weight * morph::morph(vertex.index, morph::tangent_offset, i), 0.0);
vertex.tangent += vec4(weight * morph::morph(vertex_index, morph::tangent_offset, i), 0.0);
#endif
}
return vertex;
Expand Down
122 changes: 93 additions & 29 deletions crates/bevy_pbr/src/render/mesh.rs
Original file line number Diff line number Diff line change
Expand Up @@ -186,8 +186,8 @@ impl Plugin for MeshRenderPlugin {

if use_gpu_instance_buffer_builder {
render_app
.init_resource::<gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>(
)
.init_resource::<gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>>()
.init_resource::<RenderMeshInstanceGpuQueues>()
.add_systems(
ExtractSchedule,
extract_meshes_for_gpu_building.in_set(ExtractMeshesSet),
Expand All @@ -200,6 +200,9 @@ impl Plugin for MeshRenderPlugin {
gpu_preprocessing::delete_old_work_item_buffers::<MeshPipeline>
.in_set(RenderSet::ManageViews)
.after(prepare_view_targets),
collect_meshes_for_gpu_building
.in_set(RenderSet::PrepareAssets)
.after(allocator::allocate_and_free_meshes),
),
);
} else {
Expand Down Expand Up @@ -275,6 +278,18 @@ pub struct MeshUniform {
//
// (MSB: most significant bit; LSB: least significant bit.)
pub lightmap_uv_rect: UVec2,
/// The index of this mesh's first vertex in the vertex buffer.
///
/// Multiple meshes can be packed into a single vertex buffer (see
/// [`MeshAllocator`]). This value stores the offset of the first vertex in
/// this mesh in that buffer.
pub first_vertex_index: u32,
/// Padding.
pub pad_a: u32,
/// Padding.
pub pad_b: u32,
/// Padding.
pub pad_c: u32,
}

/// Information that has to be transferred from CPU to GPU in order to produce
Expand Down Expand Up @@ -305,6 +320,18 @@ pub struct MeshInputUniform {
///
/// This is used for TAA. If not present, this will be `u32::MAX`.
pub previous_input_index: u32,
/// The index of this mesh's first vertex in the vertex buffer.
///
/// Multiple meshes can be packed into a single vertex buffer (see
/// [`MeshAllocator`]). This value stores the offset of the first vertex in
/// this mesh in that buffer.
pub first_vertex_index: u32,
/// Padding.
pub pad_a: u32,
/// Padding.
pub pad_b: u32,
/// Padding.
pub pad_c: u32,
}

/// Information about each mesh instance needed to cull it on GPU.
Expand All @@ -331,7 +358,11 @@ pub struct MeshCullingData {
pub struct MeshCullingDataBuffer(RawBufferVec<MeshCullingData>);

impl MeshUniform {
pub fn new(mesh_transforms: &MeshTransforms, maybe_lightmap_uv_rect: Option<Rect>) -> Self {
pub fn new(
mesh_transforms: &MeshTransforms,
first_vertex_index: u32,
maybe_lightmap_uv_rect: Option<Rect>,
) -> Self {
let (local_from_world_transpose_a, local_from_world_transpose_b) =
mesh_transforms.world_from_local.inverse_transpose_3x3();
Self {
Expand All @@ -341,6 +372,10 @@ impl MeshUniform {
local_from_world_transpose_a,
local_from_world_transpose_b,
flags: mesh_transforms.flags,
first_vertex_index,
pad_a: 0,
pad_b: 0,
pad_c: 0,
}
}
}
Expand Down Expand Up @@ -515,6 +550,14 @@ pub enum RenderMeshInstanceGpuQueue {
GpuCulling(Vec<(Entity, RenderMeshInstanceGpuBuilder, MeshCullingData)>),
}

/// The per-thread queues containing mesh instances, populated during the
/// extract phase.
///
/// These are filled in [`extract_meshes_for_gpu_building`] and consumed in
/// [`collect_meshes_for_gpu_building`].
#[derive(Resource, Default, Deref, DerefMut)]
pub struct RenderMeshInstanceGpuQueues(Parallel<RenderMeshInstanceGpuQueue>);

impl RenderMeshInstanceShared {
fn from_components(
previous_transform: Option<&PreviousGlobalTransform>,
Expand Down Expand Up @@ -719,7 +762,14 @@ impl RenderMeshInstanceGpuBuilder {
entity: Entity,
render_mesh_instances: &mut EntityHashMap<RenderMeshInstanceGpu>,
current_input_buffer: &mut RawBufferVec<MeshInputUniform>,
mesh_allocator: &MeshAllocator,
) -> usize {
let first_vertex_index = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id)
{
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};

// Push the mesh input uniform.
let current_uniform_index = current_input_buffer.push(MeshInputUniform {
world_from_local: self.world_from_local.to_transpose(),
Expand All @@ -729,6 +779,10 @@ impl RenderMeshInstanceGpuBuilder {
Some(previous_input_index) => previous_input_index.into(),
None => u32::MAX,
},
first_vertex_index,
pad_a: 0,
pad_b: 0,
pad_c: 0,
});

// Record the [`RenderMeshInstance`].
Expand Down Expand Up @@ -900,11 +954,7 @@ pub fn extract_meshes_for_cpu_building(
pub fn extract_meshes_for_gpu_building(
mut render_mesh_instances: ResMut<RenderMeshInstances>,
render_visibility_ranges: Res<RenderVisibilityRanges>,
mut batched_instance_buffers: ResMut<
gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
>,
mut mesh_culling_data_buffer: ResMut<MeshCullingDataBuffer>,
mut render_mesh_instance_queues: Local<Parallel<RenderMeshInstanceGpuQueue>>,
mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
meshes_query: Extract<
Query<(
Entity,
Expand Down Expand Up @@ -1004,13 +1054,6 @@ pub fn extract_meshes_for_gpu_building(
queue.push(entity, gpu_mesh_instance_builder, gpu_mesh_culling_data);
},
);

collect_meshes_for_gpu_building(
render_mesh_instances,
&mut batched_instance_buffers,
&mut mesh_culling_data_buffer,
&mut render_mesh_instance_queues,
);
}

/// A system that sets the [`RenderMeshInstanceFlags`] for each mesh based on
Expand Down Expand Up @@ -1044,22 +1087,28 @@ fn set_mesh_motion_vector_flags(

/// Creates the [`RenderMeshInstanceGpu`]s and [`MeshInputUniform`]s when GPU
/// mesh uniforms are built.
fn collect_meshes_for_gpu_building(
render_mesh_instances: &mut RenderMeshInstancesGpu,
batched_instance_buffers: &mut gpu_preprocessing::BatchedInstanceBuffers<
MeshUniform,
MeshInputUniform,
pub fn collect_meshes_for_gpu_building(
render_mesh_instances: ResMut<RenderMeshInstances>,
batched_instance_buffers: ResMut<
gpu_preprocessing::BatchedInstanceBuffers<MeshUniform, MeshInputUniform>,
>,
mesh_culling_data_buffer: &mut MeshCullingDataBuffer,
render_mesh_instance_queues: &mut Parallel<RenderMeshInstanceGpuQueue>,
mut mesh_culling_data_buffer: ResMut<MeshCullingDataBuffer>,
mut render_mesh_instance_queues: ResMut<RenderMeshInstanceGpuQueues>,
mesh_allocator: Res<MeshAllocator>,
) {
let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) =
render_mesh_instances.into_inner()
else {
return;
};

// Collect render mesh instances. Build up the uniform buffer.

let gpu_preprocessing::BatchedInstanceBuffers {
ref mut current_input_buffer,
ref mut previous_input_buffer,
..
} = batched_instance_buffers;
} = batched_instance_buffers.into_inner();

// Swap buffers.
mem::swap(current_input_buffer, previous_input_buffer);
Expand All @@ -1076,19 +1125,22 @@ fn collect_meshes_for_gpu_building(
for (entity, mesh_instance_builder) in queue.drain(..) {
mesh_instance_builder.add_to(
entity,
render_mesh_instances,
&mut *render_mesh_instances,
current_input_buffer,
&mesh_allocator,
);
}
}
RenderMeshInstanceGpuQueue::GpuCulling(ref mut queue) => {
for (entity, mesh_instance_builder, mesh_culling_builder) in queue.drain(..) {
let instance_data_index = mesh_instance_builder.add_to(
entity,
render_mesh_instances,
&mut *render_mesh_instances,
current_input_buffer,
&mesh_allocator,
);
let culling_data_index = mesh_culling_builder.add_to(mesh_culling_data_buffer);
let culling_data_index =
mesh_culling_builder.add_to(&mut mesh_culling_data_buffer);
debug_assert_eq!(instance_data_index, culling_data_index);
}
}
Expand Down Expand Up @@ -1220,7 +1272,7 @@ impl GetBatchData for MeshPipeline {
type BufferData = MeshUniform;

fn get_batch_data(
(mesh_instances, lightmaps, _, _): &SystemParamItem<Self::Param>,
(mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem<Self::Param>,
entity: Entity,
) -> Option<(Self::BufferData, Option<Self::CompareData>)> {
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
Expand All @@ -1231,11 +1283,17 @@ impl GetBatchData for MeshPipeline {
return None;
};
let mesh_instance = mesh_instances.get(&entity)?;
let first_vertex_index =
match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) {
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
let maybe_lightmap = lightmaps.render_lightmaps.get(&entity);

Some((
MeshUniform::new(
&mesh_instance.transforms,
first_vertex_index,
maybe_lightmap.map(|lightmap| lightmap.uv_rect),
),
mesh_instance.should_batch().then_some((
Expand Down Expand Up @@ -1277,7 +1335,7 @@ impl GetFullBatchData for MeshPipeline {
}

fn get_binned_batch_data(
(mesh_instances, lightmaps, _, _): &SystemParamItem<Self::Param>,
(mesh_instances, lightmaps, _, mesh_allocator): &SystemParamItem<Self::Param>,
entity: Entity,
) -> Option<Self::BufferData> {
let RenderMeshInstances::CpuBuilding(ref mesh_instances) = **mesh_instances else {
Expand All @@ -1287,10 +1345,16 @@ impl GetFullBatchData for MeshPipeline {
return None;
};
let mesh_instance = mesh_instances.get(&entity)?;
let first_vertex_index =
match mesh_allocator.mesh_vertex_slice(&mesh_instance.mesh_asset_id) {
Some(mesh_vertex_slice) => mesh_vertex_slice.range.start,
None => 0,
};
let maybe_lightmap = lightmaps.render_lightmaps.get(&entity);

Some(MeshUniform::new(
&mesh_instance.transforms,
first_vertex_index,
maybe_lightmap.map(|lightmap| lightmap.uv_rect),
))
}
Expand Down Expand Up @@ -2354,7 +2418,7 @@ impl<P: PhaseItem> RenderCommand<P> for DrawMesh {
}
RenderMeshBufferInfo::NonIndexed => match indirect_parameters {
None => {
pass.draw(0..gpu_mesh.vertex_count, batch_range.clone());
pass.draw(vertex_buffer_slice.range, batch_range.clone());
}
Some((indirect_parameters_offset, indirect_parameters_buffer)) => {
pass.draw_indirect(indirect_parameters_buffer, indirect_parameters_offset);
Expand Down
10 changes: 7 additions & 3 deletions crates/bevy_pbr/src/render/mesh.wgsl
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
#import bevy_pbr::{
mesh_bindings::mesh,
mesh_functions,
skinning,
morph::morph,
Expand All @@ -9,18 +10,21 @@
#ifdef MORPH_TARGETS
fn morph_vertex(vertex_in: Vertex) -> Vertex {
var vertex = vertex_in;
let first_vertex = mesh[vertex.instance_index].first_vertex_index;
let vertex_index = vertex.index - first_vertex;

let weight_count = bevy_pbr::morph::layer_count();
for (var i: u32 = 0u; i < weight_count; i ++) {
let weight = bevy_pbr::morph::weight_at(i);
if weight == 0.0 {
continue;
}
vertex.position += weight * morph(vertex.index, bevy_pbr::morph::position_offset, i);
vertex.position += weight * morph(vertex_index, bevy_pbr::morph::position_offset, i);
#ifdef VERTEX_NORMALS
vertex.normal += weight * morph(vertex.index, bevy_pbr::morph::normal_offset, i);
vertex.normal += weight * morph(vertex_index, bevy_pbr::morph::normal_offset, i);
#endif
#ifdef VERTEX_TANGENTS
vertex.tangent += vec4(weight * morph(vertex.index, bevy_pbr::morph::tangent_offset, i), 0.0);
vertex.tangent += vec4(weight * morph(vertex_index, bevy_pbr::morph::tangent_offset, i), 0.0);
#endif
}
return vertex;
Expand Down
5 changes: 5 additions & 0 deletions crates/bevy_pbr/src/render/mesh_preprocess.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,10 @@ struct MeshInput {
// The index of this mesh's `MeshInput` in the `previous_input` array, if
// applicable. If not present, this is `u32::MAX`.
previous_input_index: u32,
first_vertex_index: u32,
pad_a: u32,
pad_b: u32,
pad_c: u32,
}

// Information about each mesh instance needed to cull it on GPU.
Expand Down Expand Up @@ -186,4 +190,5 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3<u32>) {
output[mesh_output_index].local_from_world_transpose_b = local_from_world_transpose_b;
output[mesh_output_index].flags = current_input[input_index].flags;
output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect;
output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index;
}
5 changes: 5 additions & 0 deletions crates/bevy_pbr/src/render/mesh_types.wgsl
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,11 @@ struct Mesh {
// 'flags' is a bit field indicating various options. u32 is 32 bits so we have up to 32 options.
flags: u32,
lightmap_uv_rect: vec2<u32>,
// The index of the mesh's first vertex in the vertex buffer.
first_vertex_index: u32,
pad_a: u32,
pad_b: u32,
pad_c: u32,
};

#ifdef SKINNED
Expand Down
8 changes: 6 additions & 2 deletions crates/bevy_pbr/src/volumetric_fog/render.rs
Original file line number Diff line number Diff line change
Expand Up @@ -472,10 +472,14 @@ impl ViewNode for VolumetricFogNode {

render_pass
.set_index_buffer(*index_buffer_slice.buffer.slice(..), *index_format);
render_pass.draw_indexed(0..*count, 0, 0..1);
render_pass.draw_indexed(
index_buffer_slice.range.start..(index_buffer_slice.range.start + count),
vertex_buffer_slice.range.start as i32,
0..1,
);
}
RenderMeshBufferInfo::NonIndexed => {
render_pass.draw(0..render_mesh.vertex_count, 0..1);
render_pass.draw(vertex_buffer_slice.range, 0..1);
}
}
}
Expand Down

0 comments on commit d235d41

Please sign in to comment.