Skip to content

Commit

Permalink
Build batches across phases in parallel.
Browse files Browse the repository at this point in the history
Currently, invocations of `batch_and_prepare_binned_render_phase` and
`batch_and_prepare_sorted_render_phase` can't run in parallel because
they write to scene-global GPU buffers. After PR bevyengine#17698,
`batch_and_prepare_binned_render_phase` started accounting for the
lion's share of the CPU time, causing us to be strongly CPU bound on
scenes like Caldera when occlusion culling was on (because of the
overhead of batching for the Z-prepass). Although I eventually plan to
optimize `batch_and_prepare_binned_render_phase`, we can obtain
significant wins now by parallelizing that system across phases.

This commit splits all GPU buffers that
`batch_and_prepare_binned_render_phase` and
`batch_and_prepare_sorted_render_phase` touches into separate buffers
for each phase so that the scheduler will run those phases in parallel.
At the end of batch preparation, we gather the render phases up into a
single resource with a new *collection* phase. Because we already run
mesh preprocessing separately for each phase in order to make occlusion
culling work, this is actually a cleaner separation. For example, mesh
output indices (the unique ID that identifies each mesh instance on GPU)
are now guaranteed to be sequential starting from 0, which will simplify
the forthcoming work to remove them in favor of the compute dispatch ID.

On Caldera, this brings the frame time down to approximately 9.1 ms with
occlusion culling on.
  • Loading branch information
pcwalton committed Feb 10, 2025
1 parent ea57841 commit 08de0fe
Show file tree
Hide file tree
Showing 13 changed files with 966 additions and 423 deletions.
1 change: 1 addition & 0 deletions crates/bevy_pbr/src/decal/forward.rs
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ impl Plugin for ForwardDecalPlugin {
app.add_plugins(MaterialPlugin::<ForwardDecalMaterial<StandardMaterial>> {
prepass_enabled: false,
shadows_enabled: false,
allow_copies_from_indirect_parameters: false,
..Default::default()
});
}
Expand Down
11 changes: 11 additions & 0 deletions crates/bevy_pbr/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -182,6 +182,12 @@ pub struct PbrPlugin {
/// This requires compute shader support and so will be forcibly disabled if
/// the platform doesn't support those.
pub use_gpu_instance_buffer_builder: bool,
/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
/// that they can be read back to CPU.
///
/// This is a debugging feature that may reduce performance. It primarily
/// exists for the `occlusion_culling` example.
pub allow_copies_from_indirect_parameters: bool,
}

impl Default for PbrPlugin {
Expand All @@ -190,6 +196,7 @@ impl Default for PbrPlugin {
prepass_enabled: true,
add_default_deferred_lighting_plugin: true,
use_gpu_instance_buffer_builder: true,
allow_copies_from_indirect_parameters: false,
}
}
}
Expand Down Expand Up @@ -333,9 +340,13 @@ impl Plugin for PbrPlugin {
.add_plugins((
MeshRenderPlugin {
use_gpu_instance_buffer_builder: self.use_gpu_instance_buffer_builder,
allow_copies_from_indirect_parameters: self
.allow_copies_from_indirect_parameters,
},
MaterialPlugin::<StandardMaterial> {
prepass_enabled: self.prepass_enabled,
allow_copies_from_indirect_parameters: self
.allow_copies_from_indirect_parameters,
..Default::default()
},
ScreenSpaceAmbientOcclusionPlugin,
Expand Down
11 changes: 10 additions & 1 deletion crates/bevy_pbr/src/material.rs
Original file line number Diff line number Diff line change
Expand Up @@ -252,6 +252,12 @@ pub struct MaterialPlugin<M: Material> {
pub prepass_enabled: bool,
/// Controls if shadows are enabled for the Material.
pub shadows_enabled: bool,
/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
/// that they can be read back to CPU.
///
/// This is a debugging feature that may reduce performance. It primarily
/// exists for the `occlusion_culling` example.
pub allow_copies_from_indirect_parameters: bool,
pub _marker: PhantomData<M>,
}

Expand All @@ -260,6 +266,7 @@ impl<M: Material> Default for MaterialPlugin<M> {
Self {
prepass_enabled: true,
shadows_enabled: true,
allow_copies_from_indirect_parameters: false,
_marker: Default::default(),
}
}
Expand Down Expand Up @@ -374,7 +381,9 @@ where
}

if self.prepass_enabled {
app.add_plugins(PrepassPlugin::<M>::default());
app.add_plugins(PrepassPlugin::<M>::new(
self.allow_copies_from_indirect_parameters,
));
}
}

Expand Down
27 changes: 21 additions & 6 deletions crates/bevy_pbr/src/prepass/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -146,11 +146,22 @@ where
/// Sets up the prepasses for a [`Material`].
///
/// This depends on the [`PrepassPipelinePlugin`].
pub struct PrepassPlugin<M: Material>(PhantomData<M>);
pub struct PrepassPlugin<M: Material> {
/// If true, this sets the `COPY_SRC` flag on indirect draw parameters so
/// that they can be read back to CPU.
///
/// This is a debugging feature that may reduce performance. It primarily
/// exists for the `occlusion_culling` example.
pub allow_copies_from_indirect_parameters: bool,
pub phantom: PhantomData<M>,
}

impl<M: Material> Default for PrepassPlugin<M> {
fn default() -> Self {
Self(Default::default())
impl<M: Material> PrepassPlugin<M> {
pub fn new(allow_copies_from_indirect_parameters: bool) -> Self {
PrepassPlugin {
allow_copies_from_indirect_parameters,
phantom: PhantomData,
}
}
}

Expand All @@ -176,8 +187,12 @@ where
),
)
.add_plugins((
BinnedRenderPhasePlugin::<Opaque3dPrepass, MeshPipeline>::default(),
BinnedRenderPhasePlugin::<AlphaMask3dPrepass, MeshPipeline>::default(),
BinnedRenderPhasePlugin::<Opaque3dPrepass, MeshPipeline>::new(
self.allow_copies_from_indirect_parameters,
),
BinnedRenderPhasePlugin::<AlphaMask3dPrepass, MeshPipeline>::new(
self.allow_copies_from_indirect_parameters,
),
));
}

Expand Down
Loading

0 comments on commit 08de0fe

Please sign in to comment.