From 644d9ef1597e29c02ef6fcb06e237b7d733d969d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Thu, 6 Feb 2025 16:42:02 -0800 Subject: [PATCH 1/4] Fix motion vector computation after #17688. PR #17688 broke motion vector computation, and therefore motion blur, because it enabled retention of `MeshInputUniform`s, and `MeshInputUniform`s contain the indices of the previous frame's transform and the previous frame's skinned mesh joint matrices. On frame N, if a `MeshInputUniform` is retained on GPU from the previous frame, the `previous_input_index` and `previous_skin_index` would refer to the indices for frame N - 2, not the index for frame N - 1. This patch fixes the problems. It solves these issues in two different ways, one for transforms and one for skins: 1. To fix transforms, this patch supplies the *frame index* to the shader as part of the view uniforms, and specifies which frame index each mesh's previous transform refers to. So, in the situation described above, the frame index would be N, the previous frame index would be N - 1, and the `previous_input_frame_number` would be N - 2. The shader can now detect this situation and infer that the mesh has been retained, and can therefore conclude that the mesh's transform hasn't changed. 2. To fix skins, this patch replaces the explicit `previous_skin_index` with an invariant that the index of the joints for the current frame and the index of the joints for the previous frame are the same. This means that the `MeshInputUniform` never has to be updated even if the skin is animated. The downside is that we have to copy joint matrices from the previous frame's buffer to the current frame's buffer in `extract_skins`. The rationale behind (2) is that we currently have no mechanism to detect when joints that affect a skin have been updated, short of comparing all the transforms and setting a flag for `extract_meshes_for_gpu_building` to consume, which would regress performance as we want `extract_skins` and `extract_meshes_for_gpu_building` to be able to run in parallel. To test this change, use `cargo run --example motion_blur`. --- crates/bevy_pbr/src/render/gpu_preprocess.rs | 23 +-- crates/bevy_pbr/src/render/mesh.rs | 47 +++--- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 9 +- crates/bevy_pbr/src/render/mesh_types.wgsl | 2 +- crates/bevy_pbr/src/render/skin.rs | 140 +++++++++++------- crates/bevy_pbr/src/render/skinning.wgsl | 2 +- .../mesh_preprocess_types.wgsl | 2 +- .../src/render_resource/buffer_vec.rs | 5 + crates/bevy_render/src/view/mod.rs | 17 ++- crates/bevy_render/src/view/view.wgsl | 1 + 10 files changed, 147 insertions(+), 101 deletions(-) diff --git a/crates/bevy_pbr/src/render/gpu_preprocess.rs b/crates/bevy_pbr/src/render/gpu_preprocess.rs index 71ccfa09582b7..e8205114b7fc7 100644 --- a/crates/bevy_pbr/src/render/gpu_preprocess.rs +++ b/crates/bevy_pbr/src/render/gpu_preprocess.rs @@ -626,12 +626,10 @@ impl Node for EarlyGpuPreprocessNode { continue; }; - // If we're drawing indirectly, make sure the mesh preprocessing - // shader has access to the view info it needs to do culling. - let mut dynamic_offsets: SmallVec<[u32; 1]> = smallvec![]; - if !no_indirect_drawing { - dynamic_offsets.push(view_uniform_offset.offset); - } + // Make sure the mesh preprocessing shader has access to the + // view info it needs to do culling and motion vector + // computation. + let dynamic_offsets = [view_uniform_offset.offset]; // Are we drawing directly or indirectly? match *phase_bind_groups { @@ -1317,6 +1315,11 @@ fn preprocess_direct_bind_group_layout_entries() -> DynamicBindGroupLayoutEntrie DynamicBindGroupLayoutEntries::new_with_indices( ShaderStages::COMPUTE, ( + // `view` + ( + 0, + uniform_buffer::(/* has_dynamic_offset= */ true), + ), // `current_input` (3, storage_buffer_read_only::(false)), // `previous_input` @@ -1361,11 +1364,6 @@ fn gpu_culling_bind_group_layout_entries() -> DynamicBindGroupLayoutEntries { 8, storage_buffer_read_only::(/* has_dynamic_offset= */ false), ), - // `view` - ( - 0, - uniform_buffer::(/* has_dynamic_offset= */ true), - ), )) } @@ -1802,11 +1800,14 @@ impl<'a> PreprocessBindGroupBuilder<'a> { ) .ok(); + let view_uniforms_binding = self.view_uniforms.uniforms.binding()?; + Some(PhasePreprocessBindGroups::Direct( self.render_device.create_bind_group( "preprocess_direct_bind_group", &self.pipelines.direct_preprocess.bind_group_layout, &BindGroupEntries::with_indices(( + (0, view_uniforms_binding), (3, self.current_input_buffer.as_entire_binding()), (4, self.previous_input_buffer.as_entire_binding()), ( diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index fc24b5e116cc5..742d39a0e4811 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -478,13 +478,13 @@ pub struct MeshUniform { pub first_vertex_index: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + /// Padding. + pub pad: u32, } /// Information that has to be transferred from CPU to GPU in order to produce @@ -515,6 +515,11 @@ pub struct MeshInputUniform { /// /// This is used for TAA. If not present, this will be `u32::MAX`. pub previous_input_index: u32, + /// The frame number of the previous frame. + /// + /// The shader compared this field to the current frame number in order to + /// determine whether [`Self::previous_input_index`] is valid. + pub previous_input_frame_number: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -534,8 +539,6 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, - /// The previous skin index, or `u32::MAX` if there's no previous skin. - pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -577,7 +580,6 @@ impl MeshUniform { material_bind_group_slot: MaterialBindGroupSlot, maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, current_skin_index: Option, - previous_skin_index: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = mesh_transforms.world_from_local.inverse_transpose_3x3(); @@ -595,9 +597,9 @@ impl MeshUniform { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: current_skin_index.unwrap_or(u32::MAX), - previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) | ((lightmap_bind_group_slot as u32) << 16), + pad: 0, } } } @@ -1115,11 +1117,7 @@ impl RenderMeshInstanceGpuBuilder { None => (false, 0, 0), }; - let current_skin_index = match skin_indices.current.get(&entity) { - Some(skin_indices) => skin_indices.index(), - None => u32::MAX, - }; - let previous_skin_index = match skin_indices.prev.get(&entity) { + let current_skin_index = match skin_indices.get(&entity) { Some(skin_indices) => skin_indices.index(), None => u32::MAX, }; @@ -1148,6 +1146,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, + previous_input_frame_number: u32::MAX, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { @@ -1156,7 +1155,6 @@ impl RenderMeshInstanceGpuBuilder { vertex_count }, current_skin_index, - previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), @@ -1557,7 +1555,7 @@ fn set_mesh_motion_vector_flags( skin_indices: Res, morph_indices: Res, ) { - for &entity in skin_indices.prev.keys() { + for &entity in skin_indices.keys() { render_mesh_instances .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); } @@ -1827,9 +1825,7 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); - + let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); let material_bind_group_index = mesh_instance.material_bindings_index; Some(( @@ -1839,7 +1835,6 @@ impl GetBatchData for MeshPipeline { material_bind_group_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, ), mesh_instance.should_batch().then_some(( material_bind_group_index.group, @@ -1897,8 +1892,7 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); - let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); Some(MeshUniform::new( &mesh_instance.transforms, @@ -1906,7 +1900,6 @@ impl GetFullBatchData for MeshPipeline { mesh_instance.material_bindings_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, - previous_skin_index, )) } @@ -2793,8 +2786,7 @@ impl RenderCommand

for SetMeshBindGroup { return RenderCommandResult::Success; }; - let current_skin_index = skin_indices.current.get(entity); - let prev_skin_index = skin_indices.prev.get(entity); + let current_skin_index = skin_indices.get(entity); let current_morph_index = morph_indices.current.get(entity); let prev_morph_index = morph_indices.prev.get(entity); @@ -2841,14 +2833,11 @@ impl RenderCommand

for SetMeshBindGroup { if has_motion_vector_prepass { // Attach the previous skin index for motion vector computation. If // there isn't one, just use zero as the shader will ignore it. - if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) { - match prev_skin_index { - Some(prev_skin_index) => { - dynamic_offsets[offset_count] = prev_skin_index.byte_offset; - } - None => dynamic_offsets[offset_count] = 0, + if skin::skins_use_uniform_buffers(&render_device) { + if let Some(current_skin_index) = current_skin_index { + dynamic_offsets[offset_count] = current_skin_index.byte_offset; + offset_count += 1; } - offset_count += 1; } // Attach the previous morph index for motion vector computation. If diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 315dd13d3fdd6..45544ad61becf 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -193,13 +193,15 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { #endif // Look up the previous model matrix. + let previous_input_frame_number = current_input[input_index].previous_input_frame_number; let previous_input_index = current_input[input_index].previous_input_index; + let previous_input_is_valid = previous_input_frame_number + 1 == view.frame_number; var previous_world_from_local_affine_transpose: mat3x4; - if (previous_input_index == 0xffffffff) { - previous_world_from_local_affine_transpose = world_from_local_affine_transpose; - } else { + if (previous_input_is_valid && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = previous_input[previous_input_index].world_from_local; + } else { + previous_world_from_local_affine_transpose = world_from_local_affine_transpose; } let previous_world_from_local = maths::affine3_to_square(previous_world_from_local_affine_transpose); @@ -342,7 +344,6 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; - output[mesh_output_index].previous_skin_index = current_input[input_index].previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; } diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index f0258770c6da0..580c220d7af35 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -18,10 +18,10 @@ struct Mesh { // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, + pad: u32, }; #ifdef SKINNED diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index c248821ccafd3..40fc3c6bd0fda 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -2,9 +2,10 @@ use core::mem::{self, size_of}; use std::sync::OnceLock; use bevy_asset::Assets; +use bevy_derive::{Deref, DerefMut}; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::sync_world::MainEntityHashMap; +use bevy_render::sync_world::{MainEntity, MainEntityHashMap}; use bevy_render::{ batching::NoAutomaticBatching, mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, @@ -25,7 +26,7 @@ use bevy_transform::prelude::GlobalTransform; pub const MAX_JOINTS: usize = 256; /// The location of the first joint matrix in the skin uniform buffer. -#[derive(Component)] +#[derive(Component, Clone, Copy)] pub struct SkinIndex { /// The byte offset of the first joint matrix. pub byte_offset: u32, @@ -52,16 +53,8 @@ impl SkinIndex { /// /// We store both the current frame's joint matrices and the previous frame's /// joint matrices for the purposes of motion vector calculation. -#[derive(Default, Resource)] -pub struct SkinIndices { - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::current_buffer`]. - pub current: MainEntityHashMap, - - /// Maps each skinned mesh to the applicable offset within - /// [`SkinUniforms::prev_buffer`]. - pub prev: MainEntityHashMap, -} +#[derive(Default, Resource, Deref, DerefMut)] +pub struct SkinIndices(pub MainEntityHashMap); /// The GPU buffers containing joint matrices for all skinned meshes. /// @@ -109,18 +102,21 @@ pub fn prepare_skins( render_queue: Res, mut uniform: ResMut, ) { - if uniform.current_buffer.is_empty() { - return; + if !uniform.current_buffer.is_empty() { + let len = uniform.current_buffer.len(); + uniform.current_buffer.reserve(len, &render_device); + uniform + .current_buffer + .write_buffer(&render_device, &render_queue); } - let len = uniform.current_buffer.len(); - uniform.current_buffer.reserve(len, &render_device); - uniform - .current_buffer - .write_buffer(&render_device, &render_queue); - - // We don't need to write `uniform.prev_buffer` because we already wrote it - // last frame, and the data should still be on the GPU. + if !uniform.prev_buffer.is_empty() { + let len = uniform.prev_buffer.len(); + uniform.prev_buffer.reserve(len, &render_device); + uniform + .prev_buffer + .write_buffer(&render_device, &render_queue); + } } // Notes on implementation: @@ -150,24 +146,35 @@ pub fn prepare_skins( // which normally only support fixed size arrays. You just have to make sure // in the shader that you only read the values that are valid for that binding. pub fn extract_skins( - skin_indices: ResMut, - uniform: ResMut, + mut skin_indices: ResMut, + mut skin_uniforms: ResMut, query: Extract>, inverse_bindposes: Extract>>, joints: Extract>, render_device: Res, ) { + // We're going to make a new buffer, so figure out which we need. let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); + let buffer_usages = if skins_use_uniform_buffers { + BufferUsages::UNIFORM + } else { + BufferUsages::STORAGE + }; - // Borrow check workaround. - let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); + // Grab the previous frame's buffer. We're going to copy matrices from it to + // the `prev_buffer`. + let old_skin_indices = mem::take(&mut **skin_indices); + let old_buffer = mem::replace( + &mut skin_uniforms.current_buffer, + RawBufferVec::new(buffer_usages), + ); - // Swap buffers. We need to keep the previous frame's buffer around for the - // purposes of motion vector computation. - mem::swap(&mut skin_indices.current, &mut skin_indices.prev); - mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); - skin_indices.current.clear(); - uniform.current_buffer.clear(); + let skin_uniforms = skin_uniforms.into_inner(); + let current_buffer = &mut skin_uniforms.current_buffer; + let prev_buffer = &mut skin_uniforms.prev_buffer; + + // Clear out the previous buffer so we can push onto it. + prev_buffer.clear(); let mut last_start = 0; @@ -176,44 +183,71 @@ pub fn extract_skins( if !view_visibility.get() { continue; } - let buffer = &mut uniform.current_buffer; + let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { continue; }; - let start = buffer.len(); - - let target = start + skin.joints.len().min(MAX_JOINTS); - buffer.extend( - joints - .iter_many(&skin.joints) - .zip(inverse_bindposes.iter()) - .take(MAX_JOINTS) - .map(|(joint, bindpose)| joint.affine() * *bindpose), - ); + + let main_entity = MainEntity::from(entity); + + // Determine the boundaries of this skin in the joints buffer. + let start = current_buffer.len(); + let joint_count = skin.joints.len().min(MAX_JOINTS); + let end = start + joint_count; + + let first_old_skin_index = old_skin_indices.get(&main_entity).copied(); + + // Reserve space for the joints so we don't have to allocate. + current_buffer.reserve_internal(end); + prev_buffer.reserve_internal(end); + + for (joint_index, joint) in joints.iter_many(&skin.joints).take(joint_count).enumerate() { + // Push the joint matrix for the current frame. + let joint_matrix = match inverse_bindposes.get(joint_index) { + Some(inverse_bindpose) => joint.affine() * *inverse_bindpose, + None => joint.compute_matrix(), + }; + current_buffer.push(joint_matrix); + + // Grab the joint matrix from the previous frame, if there is one, + // and copy it to the previous frame's joint matrix buffer. + let prev_joint_matrix = first_old_skin_index + .and_then(|first_old_skin_index| { + old_buffer.get(first_old_skin_index.index() + (joint_index as u32)) + }) + .copied() + .unwrap_or(joint_matrix); + prev_buffer.push(prev_joint_matrix); + } + // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, // so just bail by truncating to the start. - if buffer.len() != target { - buffer.truncate(start); + if current_buffer.len() != end { + current_buffer.truncate(start); + prev_buffer.truncate(start); continue; } + last_start = last_start.max(start); + debug_assert_eq!(current_buffer.len(), prev_buffer.len()); + // Pad to 256 byte alignment if we're using a uniform buffer. // There's no need to do this if we're using storage buffers, though. if skins_use_uniform_buffers { - while buffer.len() % 4 != 0 { - buffer.push(Mat4::ZERO); + while current_buffer.len() % 4 != 0 { + current_buffer.push(Mat4::ZERO); + prev_buffer.push(Mat4::ZERO); } } - skin_indices - .current - .insert(entity.into(), SkinIndex::new(start)); + skin_indices.insert(entity.into(), SkinIndex::new(start)); } - // Pad out the buffer to ensure that there's enough space for bindings - while uniform.current_buffer.len() - last_start < MAX_JOINTS { - uniform.current_buffer.push(Mat4::ZERO); + // Pad out the buffers to ensure that there's enough space for bindings + while current_buffer.len() - last_start < MAX_JOINTS { + current_buffer.push(Mat4::ZERO); + prev_buffer.push(Mat4::ZERO); } } diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 92e977aeb1b92..96fba57e1b3f6 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -57,7 +57,7 @@ fn skin_prev_model( + weights.z * prev_joint_matrices.data[indexes.z] + weights.w * prev_joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].previous_skin_index; + let skin_index = mesh[instance_index].current_skin_index; return weights.x * prev_joint_matrices[skin_index + indexes.x] + weights.y * prev_joint_matrices[skin_index + indexes.y] + weights.z * prev_joint_matrices[skin_index + indexes.z] diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index 7f4dd71f610f0..b7631cf1a0076 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,11 +11,11 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, + previous_input_frame_number: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, current_skin_index: u32, - previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, diff --git a/crates/bevy_render/src/render_resource/buffer_vec.rs b/crates/bevy_render/src/render_resource/buffer_vec.rs index 0feaa8487885b..32d06ab397578 100644 --- a/crates/bevy_render/src/render_resource/buffer_vec.rs +++ b/crates/bevy_render/src/render_resource/buffer_vec.rs @@ -103,6 +103,11 @@ impl RawBufferVec { self.values.append(&mut other.values); } + /// Returns the value at the given index. + pub fn get(&self, index: u32) -> Option<&T> { + self.values.get(index as usize) + } + /// Sets the value at the given index. /// /// The index must be less than [`RawBufferVec::len`]. diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index b0fab01d01b4e..0eb86bc6a5eea 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -120,7 +120,7 @@ impl Plugin for ViewPlugin { )); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.add_systems( + render_app.init_resource::().add_systems( Render, ( // `TextureView`s need to be dropped before reconfiguring window surfaces. @@ -137,6 +137,7 @@ impl Plugin for ViewPlugin { .after(crate::render_asset::prepare_assets::) .ambiguous_with(crate::camera::sort_cameras), // doesn't use `sorted_camera_index_for_target` prepare_view_uniforms.in_set(RenderSet::PrepareResources), + advance_frame.in_set(RenderSet::Cleanup), ), ); } @@ -568,6 +569,7 @@ pub struct ViewUniform { pub frustum: [Vec4; 6], pub color_grading: ColorGradingUniform, pub mip_bias: f32, + pub frame_number: u32, } #[derive(Resource)] @@ -876,6 +878,12 @@ impl ViewDepthTexture { } } +/// A number that starts at 0 and increments by 1 every frame. +/// +/// This is useful for determining which frame cached data belongs to. +#[derive(Resource, Default, Deref, DerefMut)] +pub struct FrameNumber(pub u32); + pub fn prepare_view_uniforms( mut commands: Commands, render_device: Res, @@ -889,6 +897,7 @@ pub fn prepare_view_uniforms( Option<&TemporalJitter>, Option<&MipBias>, )>, + frame_number: Res, ) { let view_iter = views.iter(); let view_count = view_iter.len(); @@ -942,6 +951,7 @@ pub fn prepare_view_uniforms( frustum, color_grading: extracted_view.color_grading.clone().into(), mip_bias: mip_bias.unwrap_or(&MipBias(0.0)).0, + frame_number: **frame_number, }), }; @@ -1107,3 +1117,8 @@ pub fn prepare_view_targets( }); } } + +/// Updates the [`FrameNumber`] at frame end. +pub fn advance_frame(mut frame_number: ResMut) { + **frame_number += 1; +} diff --git a/crates/bevy_render/src/view/view.wgsl b/crates/bevy_render/src/view/view.wgsl index ed08599758a07..2c901c3280de3 100644 --- a/crates/bevy_render/src/view/view.wgsl +++ b/crates/bevy_render/src/view/view.wgsl @@ -60,4 +60,5 @@ struct View { frustum: array, 6>, color_grading: ColorGrading, mip_bias: f32, + frame_number: u32, }; From 636e58b62413b95b7fa5cef944e84704911be157 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 7 Feb 2025 15:22:42 -0800 Subject: [PATCH 2/4] Fix meshlets --- crates/bevy_pbr/src/meshlet/instance_manager.rs | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/crates/bevy_pbr/src/meshlet/instance_manager.rs b/crates/bevy_pbr/src/meshlet/instance_manager.rs index 26f6432a1fdf7..b7370e9928c44 100644 --- a/crates/bevy_pbr/src/meshlet/instance_manager.rs +++ b/crates/bevy_pbr/src/meshlet/instance_manager.rs @@ -118,14 +118,8 @@ impl InstanceManager { .cloned() .unwrap_or_default(); - let mesh_uniform = MeshUniform::new( - &transforms, - 0, - mesh_material_binding_id.slot, - None, - None, - None, - ); + let mesh_uniform = + MeshUniform::new(&transforms, 0, mesh_material_binding_id.slot, None, None); // Append instance data self.instances.push(( From fa60b0a98a4d4a080441a3b74c00c4a7c6706626 Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Fri, 7 Feb 2025 17:56:13 -0800 Subject: [PATCH 3/4] Remove `FrameNumber` in favor of `FrameCount` --- crates/bevy_pbr/src/render/mesh.rs | 4 ++-- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 4 ++-- .../mesh_preprocess_types.wgsl | 2 +- crates/bevy_render/src/view/mod.rs | 21 +++++-------------- crates/bevy_render/src/view/view.wgsl | 2 +- 5 files changed, 11 insertions(+), 22 deletions(-) diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index 742d39a0e4811..e0f3d262513c3 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -519,7 +519,7 @@ pub struct MeshInputUniform { /// /// The shader compared this field to the current frame number in order to /// determine whether [`Self::previous_input_index`] is valid. - pub previous_input_frame_number: u32, + pub previous_input_frame_count: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -1146,7 +1146,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, - previous_input_frame_number: u32::MAX, + previous_input_frame_count: u32::MAX, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index 45544ad61becf..1c40c2c37e793 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -193,9 +193,9 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { #endif // Look up the previous model matrix. - let previous_input_frame_number = current_input[input_index].previous_input_frame_number; + let previous_input_frame_count = current_input[input_index].previous_input_frame_count; let previous_input_index = current_input[input_index].previous_input_index; - let previous_input_is_valid = previous_input_frame_number + 1 == view.frame_number; + let previous_input_is_valid = previous_input_frame_count + 1 == view.frame_count; var previous_world_from_local_affine_transpose: mat3x4; if (previous_input_is_valid && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index b7631cf1a0076..e17d45897b32a 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,7 +11,7 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, - previous_input_frame_number: u32, + previous_input_frame_count: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, diff --git a/crates/bevy_render/src/view/mod.rs b/crates/bevy_render/src/view/mod.rs index 0eb86bc6a5eea..ba0f8cca62ced 100644 --- a/crates/bevy_render/src/view/mod.rs +++ b/crates/bevy_render/src/view/mod.rs @@ -2,6 +2,7 @@ pub mod visibility; pub mod window; use bevy_asset::{load_internal_asset, weak_handle, Handle}; +use bevy_diagnostic::FrameCount; pub use visibility::*; pub use window::*; @@ -120,7 +121,7 @@ impl Plugin for ViewPlugin { )); if let Some(render_app) = app.get_sub_app_mut(RenderApp) { - render_app.init_resource::().add_systems( + render_app.add_systems( Render, ( // `TextureView`s need to be dropped before reconfiguring window surfaces. @@ -137,7 +138,6 @@ impl Plugin for ViewPlugin { .after(crate::render_asset::prepare_assets::) .ambiguous_with(crate::camera::sort_cameras), // doesn't use `sorted_camera_index_for_target` prepare_view_uniforms.in_set(RenderSet::PrepareResources), - advance_frame.in_set(RenderSet::Cleanup), ), ); } @@ -569,7 +569,7 @@ pub struct ViewUniform { pub frustum: [Vec4; 6], pub color_grading: ColorGradingUniform, pub mip_bias: f32, - pub frame_number: u32, + pub frame_count: u32, } #[derive(Resource)] @@ -878,12 +878,6 @@ impl ViewDepthTexture { } } -/// A number that starts at 0 and increments by 1 every frame. -/// -/// This is useful for determining which frame cached data belongs to. -#[derive(Resource, Default, Deref, DerefMut)] -pub struct FrameNumber(pub u32); - pub fn prepare_view_uniforms( mut commands: Commands, render_device: Res, @@ -897,7 +891,7 @@ pub fn prepare_view_uniforms( Option<&TemporalJitter>, Option<&MipBias>, )>, - frame_number: Res, + frame_count: Res, ) { let view_iter = views.iter(); let view_count = view_iter.len(); @@ -951,7 +945,7 @@ pub fn prepare_view_uniforms( frustum, color_grading: extracted_view.color_grading.clone().into(), mip_bias: mip_bias.unwrap_or(&MipBias(0.0)).0, - frame_number: **frame_number, + frame_count: frame_count.0, }), }; @@ -1117,8 +1111,3 @@ pub fn prepare_view_targets( }); } } - -/// Updates the [`FrameNumber`] at frame end. -pub fn advance_frame(mut frame_number: ResMut) { - **frame_number += 1; -} diff --git a/crates/bevy_render/src/view/view.wgsl b/crates/bevy_render/src/view/view.wgsl index 2c901c3280de3..317de2eb88073 100644 --- a/crates/bevy_render/src/view/view.wgsl +++ b/crates/bevy_render/src/view/view.wgsl @@ -60,5 +60,5 @@ struct View { frustum: array, 6>, color_grading: ColorGrading, mip_bias: f32, - frame_number: u32, + frame_count: u32, }; From ea2029d9ac5b22c2819ae9b90e601645d662402d Mon Sep 17 00:00:00 2001 From: Patrick Walton Date: Tue, 11 Feb 2025 00:15:44 -0800 Subject: [PATCH 4/4] Try to fix motion blur properly. --- crates/bevy_pbr/Cargo.toml | 1 + crates/bevy_pbr/src/render/mesh.rs | 66 +++++--- .../bevy_pbr/src/render/mesh_preprocess.wgsl | 15 +- crates/bevy_pbr/src/render/mesh_types.wgsl | 1 + crates/bevy_pbr/src/render/skin.rs | 154 ++++++++---------- crates/bevy_pbr/src/render/skinning.wgsl | 16 +- .../mesh_preprocess_types.wgsl | 4 +- 7 files changed, 139 insertions(+), 118 deletions(-) diff --git a/crates/bevy_pbr/Cargo.toml b/crates/bevy_pbr/Cargo.toml index bf568885e7fef..5031d7c91444f 100644 --- a/crates/bevy_pbr/Cargo.toml +++ b/crates/bevy_pbr/Cargo.toml @@ -37,6 +37,7 @@ bevy_asset = { path = "../bevy_asset", version = "0.16.0-dev" } bevy_color = { path = "../bevy_color", version = "0.16.0-dev" } bevy_core_pipeline = { path = "../bevy_core_pipeline", version = "0.16.0-dev" } bevy_derive = { path = "../bevy_derive", version = "0.16.0-dev" } +bevy_diagnostic = { path = "../bevy_diagnostic", version = "0.16.0-dev" } bevy_ecs = { path = "../bevy_ecs", version = "0.16.0-dev" } bevy_image = { path = "../bevy_image", version = "0.16.0-dev" } bevy_math = { path = "../bevy_math", version = "0.16.0-dev" } diff --git a/crates/bevy_pbr/src/render/mesh.rs b/crates/bevy_pbr/src/render/mesh.rs index e50f4a5918946..34d0debbb7c6b 100644 --- a/crates/bevy_pbr/src/render/mesh.rs +++ b/crates/bevy_pbr/src/render/mesh.rs @@ -1,4 +1,7 @@ -use crate::material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}; +use crate::{ + material_bind_groups::{MaterialBindGroupIndex, MaterialBindGroupSlot}, + skin::mark_meshes_as_changed_if_their_skins_changed, +}; use allocator::MeshAllocator; use bevy_asset::{load_internal_asset, AssetId, UntypedAssetId}; use bevy_core_pipeline::{ @@ -8,6 +11,7 @@ use bevy_core_pipeline::{ prepass::MotionVectorPrepass, }; use bevy_derive::{Deref, DerefMut}; +use bevy_diagnostic::FrameCount; use bevy_ecs::{ prelude::*, query::ROQueryItem, @@ -163,7 +167,13 @@ impl Plugin for MeshRenderPlugin { app.add_systems( PostUpdate, - (no_automatic_skin_batching, no_automatic_morph_batching), + ( + no_automatic_skin_batching, + no_automatic_morph_batching, + mark_meshes_as_changed_if_their_skins_changed + .ambiguous_with_all() + .after(mark_3d_meshes_as_changed_if_their_assets_changed), + ), ) .add_plugins(( BinnedRenderPhasePlugin::::default(), @@ -478,6 +488,8 @@ pub struct MeshUniform { pub first_vertex_index: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, + /// The previous skin index, or `u32::MAX` if there's no previous skin. + pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. @@ -515,11 +527,6 @@ pub struct MeshInputUniform { /// /// This is used for TAA. If not present, this will be `u32::MAX`. pub previous_input_index: u32, - /// The frame number of the previous frame. - /// - /// The shader compared this field to the current frame number in order to - /// determine whether [`Self::previous_input_index`] is valid. - pub previous_input_frame_count: u32, /// The index of this mesh's first vertex in the vertex buffer. /// /// Multiple meshes can be packed into a single vertex buffer (see @@ -539,15 +546,15 @@ pub struct MeshInputUniform { pub index_count: u32, /// The current skin index, or `u32::MAX` if there's no skin. pub current_skin_index: u32, + pub previous_skin_index: u32, /// The material and lightmap indices, packed into 32 bits. /// /// Low 16 bits: index of the material inside the bind group data. /// High 16 bits: index of the lightmap in the binding array. pub material_and_lightmap_bind_group_slot: u32, + pub timestamp: u32, /// User supplied tag to identify this mesh instance. pub tag: u32, - /// Padding. - pub pad: u32, } /// Information about each mesh instance needed to cull it on GPU. @@ -580,6 +587,7 @@ impl MeshUniform { material_bind_group_slot: MaterialBindGroupSlot, maybe_lightmap: Option<(LightmapSlotIndex, Rect)>, current_skin_index: Option, + previous_skin_index: Option, tag: Option, ) -> Self { let (local_from_world_transpose_a, local_from_world_transpose_b) = @@ -598,6 +606,7 @@ impl MeshUniform { flags: mesh_transforms.flags, first_vertex_index, current_skin_index: current_skin_index.unwrap_or(u32::MAX), + previous_skin_index: previous_skin_index.unwrap_or(u32::MAX), material_and_lightmap_bind_group_slot: u32::from(material_bind_group_slot) | ((lightmap_bind_group_slot as u32) << 16), tag: tag.unwrap_or(0), @@ -1103,6 +1112,7 @@ impl RenderMeshInstanceGpuBuilder { render_material_bindings: &RenderMaterialBindings, render_lightmaps: &RenderLightmaps, skin_indices: &SkinIndices, + timestamp: FrameCount, ) -> u32 { let (first_vertex_index, vertex_count) = match mesh_allocator.mesh_vertex_slice(&self.shared.mesh_asset_id) { @@ -1121,8 +1131,11 @@ impl RenderMeshInstanceGpuBuilder { ), None => (false, 0, 0), }; - - let current_skin_index = match skin_indices.get(&entity) { + let current_skin_index = match skin_indices.current.get(&entity) { + Some(skin_indices) => skin_indices.index(), + None => u32::MAX, + }; + let previous_skin_index = match skin_indices.prev.get(&entity) { Some(skin_indices) => skin_indices.index(), None => u32::MAX, }; @@ -1151,7 +1164,7 @@ impl RenderMeshInstanceGpuBuilder { lightmap_uv_rect: self.lightmap_uv_rect, flags: self.mesh_flags.bits(), previous_input_index: u32::MAX, - previous_input_frame_count: u32::MAX, + timestamp: timestamp.0, first_vertex_index, first_index_index, index_count: if mesh_is_indexed { @@ -1160,11 +1173,11 @@ impl RenderMeshInstanceGpuBuilder { vertex_count }, current_skin_index, + previous_skin_index, material_and_lightmap_bind_group_slot: u32::from( self.shared.material_bindings_index.slot, ) | ((lightmap_slot as u32) << 16), tag: self.shared.tag, - pad: 0, }; // Did the last frame contain this entity as well? @@ -1566,7 +1579,7 @@ fn set_mesh_motion_vector_flags( skin_indices: Res, morph_indices: Res, ) { - for &entity in skin_indices.keys() { + for &entity in skin_indices.prev.keys() { render_mesh_instances .insert_mesh_instance_flags(entity, RenderMeshInstanceFlags::HAS_PREVIOUS_SKIN); } @@ -1590,6 +1603,7 @@ pub fn collect_meshes_for_gpu_building( render_material_bindings: Res, render_lightmaps: Res, skin_indices: Res, + frame_count: Res, ) { let RenderMeshInstances::GpuBuilding(ref mut render_mesh_instances) = render_mesh_instances.into_inner() @@ -1629,6 +1643,7 @@ pub fn collect_meshes_for_gpu_building( &render_material_bindings, &render_lightmaps, &skin_indices, + *frame_count, ); } @@ -1656,6 +1671,7 @@ pub fn collect_meshes_for_gpu_building( &render_material_bindings, &render_lightmaps, &skin_indices, + *frame_count, ); mesh_culling_builder .update(&mut mesh_culling_data_buffer, instance_data_index as usize); @@ -1836,7 +1852,8 @@ impl GetBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); let material_bind_group_index = mesh_instance.material_bindings_index; Some(( @@ -1846,6 +1863,7 @@ impl GetBatchData for MeshPipeline { material_bind_group_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, + previous_skin_index, Some(mesh_instance.tag), ), mesh_instance.should_batch().then_some(( @@ -1904,7 +1922,8 @@ impl GetFullBatchData for MeshPipeline { }; let maybe_lightmap = lightmaps.render_lightmaps.get(&main_entity); - let current_skin_index = skin_indices.get(&main_entity).map(SkinIndex::index); + let current_skin_index = skin_indices.current.get(&main_entity).map(SkinIndex::index); + let previous_skin_index = skin_indices.prev.get(&main_entity).map(SkinIndex::index); Some(MeshUniform::new( &mesh_instance.transforms, @@ -1912,6 +1931,7 @@ impl GetFullBatchData for MeshPipeline { mesh_instance.material_bindings_index.slot, maybe_lightmap.map(|lightmap| (lightmap.slot_index, lightmap.uv_rect)), current_skin_index, + previous_skin_index, Some(mesh_instance.tag), )) } @@ -2799,7 +2819,8 @@ impl RenderCommand

for SetMeshBindGroup { return RenderCommandResult::Success; }; - let current_skin_index = skin_indices.get(entity); + let current_skin_index = skin_indices.current.get(entity); + let prev_skin_index = skin_indices.prev.get(entity); let current_morph_index = morph_indices.current.get(entity); let prev_morph_index = morph_indices.prev.get(entity); @@ -2846,11 +2867,14 @@ impl RenderCommand

for SetMeshBindGroup { if has_motion_vector_prepass { // Attach the previous skin index for motion vector computation. If // there isn't one, just use zero as the shader will ignore it. - if skin::skins_use_uniform_buffers(&render_device) { - if let Some(current_skin_index) = current_skin_index { - dynamic_offsets[offset_count] = current_skin_index.byte_offset; - offset_count += 1; + if current_skin_index.is_some() && skin::skins_use_uniform_buffers(&render_device) { + match prev_skin_index { + Some(prev_skin_index) => { + dynamic_offsets[offset_count] = prev_skin_index.byte_offset; + } + None => dynamic_offsets[offset_count] = 0, } + offset_count += 1; } // Attach the previous morph index for motion vector computation. If diff --git a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl index ccdad7b48e189..932bd58a1aaab 100644 --- a/crates/bevy_pbr/src/render/mesh_preprocess.wgsl +++ b/crates/bevy_pbr/src/render/mesh_preprocess.wgsl @@ -192,12 +192,14 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { } #endif + // Was the mesh transform updated this frame? + let timestamp = current_input[input_index].timestamp; + let mesh_changed_this_frame = timestamp == view.frame_count; + // Look up the previous model matrix. - let previous_input_frame_count = current_input[input_index].previous_input_frame_count; let previous_input_index = current_input[input_index].previous_input_index; - let previous_input_is_valid = previous_input_frame_count + 1 == view.frame_count; var previous_world_from_local_affine_transpose: mat3x4; - if (previous_input_is_valid && previous_input_index != 0xffffffffu) { + if (mesh_changed_this_frame && previous_input_index != 0xffffffffu) { previous_world_from_local_affine_transpose = previous_input[previous_input_index].world_from_local; } else { @@ -206,6 +208,12 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { let previous_world_from_local = maths::affine3_to_square(previous_world_from_local_affine_transpose); + let previous_skin_index = select( + 0xffffffffu, + current_input[input_index].previous_skin_index, + mesh_changed_this_frame + ); + // Occlusion cull if necessary. This is done by calculating the screen-space // axis-aligned bounding box (AABB) of the mesh and testing it against the // appropriate level of the depth pyramid (a.k.a. hierarchical Z-buffer). If @@ -344,6 +352,7 @@ fn main(@builtin(global_invocation_id) global_invocation_id: vec3) { output[mesh_output_index].lightmap_uv_rect = current_input[input_index].lightmap_uv_rect; output[mesh_output_index].first_vertex_index = current_input[input_index].first_vertex_index; output[mesh_output_index].current_skin_index = current_input[input_index].current_skin_index; + output[mesh_output_index].previous_skin_index = previous_skin_index; output[mesh_output_index].material_and_lightmap_bind_group_slot = current_input[input_index].material_and_lightmap_bind_group_slot; output[mesh_output_index].tag = current_input[input_index].tag; diff --git a/crates/bevy_pbr/src/render/mesh_types.wgsl b/crates/bevy_pbr/src/render/mesh_types.wgsl index c3fd684028974..ebac14915ea13 100644 --- a/crates/bevy_pbr/src/render/mesh_types.wgsl +++ b/crates/bevy_pbr/src/render/mesh_types.wgsl @@ -18,6 +18,7 @@ struct Mesh { // The index of the mesh's first vertex in the vertex buffer. first_vertex_index: u32, current_skin_index: u32, + previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, diff --git a/crates/bevy_pbr/src/render/skin.rs b/crates/bevy_pbr/src/render/skin.rs index 40fc3c6bd0fda..c3082caf1e490 100644 --- a/crates/bevy_pbr/src/render/skin.rs +++ b/crates/bevy_pbr/src/render/skin.rs @@ -2,10 +2,10 @@ use core::mem::{self, size_of}; use std::sync::OnceLock; use bevy_asset::Assets; -use bevy_derive::{Deref, DerefMut}; use bevy_ecs::prelude::*; use bevy_math::Mat4; -use bevy_render::sync_world::{MainEntity, MainEntityHashMap}; +use bevy_render::mesh::Mesh3d; +use bevy_render::sync_world::MainEntityHashMap; use bevy_render::{ batching::NoAutomaticBatching, mesh::skinning::{SkinnedMesh, SkinnedMeshInverseBindposes}, @@ -53,8 +53,16 @@ impl SkinIndex { /// /// We store both the current frame's joint matrices and the previous frame's /// joint matrices for the purposes of motion vector calculation. -#[derive(Default, Resource, Deref, DerefMut)] -pub struct SkinIndices(pub MainEntityHashMap); +#[derive(Default, Resource)] +pub struct SkinIndices { + /// Maps each skinned mesh to the applicable offset within + /// [`SkinUniforms::current_buffer`]. + pub current: MainEntityHashMap, + + /// Maps each skinned mesh to the applicable offset within + /// [`SkinUniforms::prev_buffer`]. + pub prev: MainEntityHashMap, +} /// The GPU buffers containing joint matrices for all skinned meshes. /// @@ -102,21 +110,18 @@ pub fn prepare_skins( render_queue: Res, mut uniform: ResMut, ) { - if !uniform.current_buffer.is_empty() { - let len = uniform.current_buffer.len(); - uniform.current_buffer.reserve(len, &render_device); - uniform - .current_buffer - .write_buffer(&render_device, &render_queue); + if uniform.current_buffer.is_empty() { + return; } - if !uniform.prev_buffer.is_empty() { - let len = uniform.prev_buffer.len(); - uniform.prev_buffer.reserve(len, &render_device); - uniform - .prev_buffer - .write_buffer(&render_device, &render_queue); - } + let len = uniform.current_buffer.len(); + uniform.current_buffer.reserve(len, &render_device); + uniform + .current_buffer + .write_buffer(&render_device, &render_queue); + + // We don't need to write `uniform.prev_buffer` because we already wrote it + // last frame, and the data should still be on the GPU. } // Notes on implementation: @@ -146,35 +151,24 @@ pub fn prepare_skins( // which normally only support fixed size arrays. You just have to make sure // in the shader that you only read the values that are valid for that binding. pub fn extract_skins( - mut skin_indices: ResMut, - mut skin_uniforms: ResMut, + skin_indices: ResMut, + uniform: ResMut, query: Extract>, inverse_bindposes: Extract>>, joints: Extract>, render_device: Res, ) { - // We're going to make a new buffer, so figure out which we need. let skins_use_uniform_buffers = skins_use_uniform_buffers(&render_device); - let buffer_usages = if skins_use_uniform_buffers { - BufferUsages::UNIFORM - } else { - BufferUsages::STORAGE - }; - - // Grab the previous frame's buffer. We're going to copy matrices from it to - // the `prev_buffer`. - let old_skin_indices = mem::take(&mut **skin_indices); - let old_buffer = mem::replace( - &mut skin_uniforms.current_buffer, - RawBufferVec::new(buffer_usages), - ); - let skin_uniforms = skin_uniforms.into_inner(); - let current_buffer = &mut skin_uniforms.current_buffer; - let prev_buffer = &mut skin_uniforms.prev_buffer; + // Borrow check workaround. + let (skin_indices, uniform) = (skin_indices.into_inner(), uniform.into_inner()); - // Clear out the previous buffer so we can push onto it. - prev_buffer.clear(); + // Swap buffers. We need to keep the previous frame's buffer around for the + // purposes of motion vector computation. + mem::swap(&mut skin_indices.current, &mut skin_indices.prev); + mem::swap(&mut uniform.current_buffer, &mut uniform.prev_buffer); + skin_indices.current.clear(); + uniform.current_buffer.clear(); let mut last_start = 0; @@ -183,71 +177,44 @@ pub fn extract_skins( if !view_visibility.get() { continue; } - + let buffer = &mut uniform.current_buffer; let Some(inverse_bindposes) = inverse_bindposes.get(&skin.inverse_bindposes) else { continue; }; - - let main_entity = MainEntity::from(entity); - - // Determine the boundaries of this skin in the joints buffer. - let start = current_buffer.len(); - let joint_count = skin.joints.len().min(MAX_JOINTS); - let end = start + joint_count; - - let first_old_skin_index = old_skin_indices.get(&main_entity).copied(); - - // Reserve space for the joints so we don't have to allocate. - current_buffer.reserve_internal(end); - prev_buffer.reserve_internal(end); - - for (joint_index, joint) in joints.iter_many(&skin.joints).take(joint_count).enumerate() { - // Push the joint matrix for the current frame. - let joint_matrix = match inverse_bindposes.get(joint_index) { - Some(inverse_bindpose) => joint.affine() * *inverse_bindpose, - None => joint.compute_matrix(), - }; - current_buffer.push(joint_matrix); - - // Grab the joint matrix from the previous frame, if there is one, - // and copy it to the previous frame's joint matrix buffer. - let prev_joint_matrix = first_old_skin_index - .and_then(|first_old_skin_index| { - old_buffer.get(first_old_skin_index.index() + (joint_index as u32)) - }) - .copied() - .unwrap_or(joint_matrix); - prev_buffer.push(prev_joint_matrix); - } - + let start = buffer.len(); + + let target = start + skin.joints.len().min(MAX_JOINTS); + buffer.extend( + joints + .iter_many(&skin.joints) + .zip(inverse_bindposes.iter()) + .take(MAX_JOINTS) + .map(|(joint, bindpose)| joint.affine() * *bindpose), + ); // iter_many will skip any failed fetches. This will cause it to assign the wrong bones, // so just bail by truncating to the start. - if current_buffer.len() != end { - current_buffer.truncate(start); - prev_buffer.truncate(start); + if buffer.len() != target { + buffer.truncate(start); continue; } - last_start = last_start.max(start); - debug_assert_eq!(current_buffer.len(), prev_buffer.len()); - // Pad to 256 byte alignment if we're using a uniform buffer. // There's no need to do this if we're using storage buffers, though. if skins_use_uniform_buffers { - while current_buffer.len() % 4 != 0 { - current_buffer.push(Mat4::ZERO); - prev_buffer.push(Mat4::ZERO); + while buffer.len() % 4 != 0 { + buffer.push(Mat4::ZERO); } } - skin_indices.insert(entity.into(), SkinIndex::new(start)); + skin_indices + .current + .insert(entity.into(), SkinIndex::new(start)); } - // Pad out the buffers to ensure that there's enough space for bindings - while current_buffer.len() - last_start < MAX_JOINTS { - current_buffer.push(Mat4::ZERO); - prev_buffer.push(Mat4::ZERO); + // Pad out the buffer to ensure that there's enough space for bindings + while uniform.current_buffer.len() - last_start < MAX_JOINTS { + uniform.current_buffer.push(Mat4::ZERO); } } @@ -266,3 +233,18 @@ pub fn no_automatic_skin_batching( commands.entity(entity).try_insert(NoAutomaticBatching); } } + +pub fn mark_meshes_as_changed_if_their_skins_changed( + mut skinned_meshes_query: Query<(&mut Mesh3d, &SkinnedMesh)>, + changed_joints_query: Query>, +) { + for (mut mesh, skinned_mesh) in &mut skinned_meshes_query { + if skinned_mesh + .joints + .iter() + .any(|&joint| changed_joints_query.contains(joint)) + { + mesh.set_changed(); + } + } +} diff --git a/crates/bevy_pbr/src/render/skinning.wgsl b/crates/bevy_pbr/src/render/skinning.wgsl index 96fba57e1b3f6..038b293eed238 100644 --- a/crates/bevy_pbr/src/render/skinning.wgsl +++ b/crates/bevy_pbr/src/render/skinning.wgsl @@ -34,7 +34,7 @@ fn skin_model( + weights.z * joint_matrices.data[indexes.z] + weights.w * joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].current_skin_index; + var skin_index = mesh[instance_index].current_skin_index; return weights.x * joint_matrices[skin_index + indexes.x] + weights.y * joint_matrices[skin_index + indexes.y] + weights.z * joint_matrices[skin_index + indexes.z] @@ -57,11 +57,15 @@ fn skin_prev_model( + weights.z * prev_joint_matrices.data[indexes.z] + weights.w * prev_joint_matrices.data[indexes.w]; #else // SKINS_USE_UNIFORM_BUFFERS - let skin_index = mesh[instance_index].current_skin_index; - return weights.x * prev_joint_matrices[skin_index + indexes.x] - + weights.y * prev_joint_matrices[skin_index + indexes.y] - + weights.z * prev_joint_matrices[skin_index + indexes.z] - + weights.w * prev_joint_matrices[skin_index + indexes.w]; + let prev_skin_index = mesh[instance_index].previous_skin_index; + if (prev_skin_index == 0xffffffffu) { + return skin_model(indexed, weights, instance_index); + } + + return weights.x * prev_joint_matrices[prev_skin_index + indexes.x] + + weights.y * prev_joint_matrices[prev_skin_index + indexes.y] + + weights.z * prev_joint_matrices[prev_skin_index + indexes.z] + + weights.w * prev_joint_matrices[prev_skin_index + indexes.w]; #endif // SKINS_USE_UNIFORM_BUFFERS } diff --git a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl index 957d5c037e7db..cbf9b2dc46338 100644 --- a/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl +++ b/crates/bevy_render/src/experimental/occlusion_culling/mesh_preprocess_types.wgsl @@ -11,17 +11,17 @@ struct MeshInput { // Various flags. flags: u32, previous_input_index: u32, - previous_input_frame_count: u32, first_vertex_index: u32, first_index_index: u32, index_count: u32, current_skin_index: u32, + previous_skin_index: u32, // Low 16 bits: index of the material inside the bind group data. // High 16 bits: index of the lightmap in the binding array. material_and_lightmap_bind_group_slot: u32, + timestamp: u32, // User supplied index to identify the mesh instance tag: u32, - pad: u32, } // The `wgpu` indirect parameters structure. This is a union of two structures.