diff --git a/src/niagara.cpp b/src/niagara.cpp index 0e0fb8c..565a064 100644 --- a/src/niagara.cpp +++ b/src/niagara.cpp @@ -624,6 +624,8 @@ int main(int argc, const char** argv) Program blitProgram = createProgram(device, VK_PIPELINE_BIND_POINT_COMPUTE, { &shaders["blit.comp"] }, sizeof(vec4)); Program shadeProgram = createProgram(device, VK_PIPELINE_BIND_POINT_COMPUTE, { &shaders["shade.comp"] }, sizeof(ShadeData)); + Program shadowProgram = createProgram(device, VK_PIPELINE_BIND_POINT_COMPUTE, { &shaders["shadow.comp"] }, sizeof(ShadeData)); + Program shadowblurProgram = createProgram(device, VK_PIPELINE_BIND_POINT_COMPUTE, { &shaders["shadowblur.comp"] }, sizeof(vec4)); VkPipeline drawcullPipeline = 0; VkPipeline drawculllatePipeline = 0; @@ -643,6 +645,8 @@ int main(int argc, const char** argv) VkPipeline clusterpostPipeline = 0; VkPipeline blitPipeline = 0; VkPipeline shadePipeline = 0; + VkPipeline shadowPipeline = 0; + VkPipeline shadowblurPipeline = 0; auto pipelines = [&]() { @@ -682,6 +686,8 @@ int main(int argc, const char** argv) replace(blitPipeline, createComputePipeline(device, pipelineCache, blitProgram)); replace(shadePipeline, createComputePipeline(device, pipelineCache, shadeProgram)); + replace(shadowPipeline, createComputePipeline(device, pipelineCache, shadowProgram)); + replace(shadowblurPipeline, createComputePipeline(device, pipelineCache, shadowblurProgram)); }; pipelines(); @@ -938,6 +944,8 @@ int main(int argc, const char** argv) Image gbufferTargets[gbufferCount] = {}; Image depthTarget = {}; + Image shadowTarget = {}; + Image shadowblurTarget = {}; Image depthPyramid = {}; VkImageView depthPyramidMips[16] = {}; @@ -1034,10 +1042,18 @@ int main(int argc, const char** argv) destroyImage(depthPyramid, device); } + if (shadowTarget.image) + destroyImage(shadowTarget, device); + if (shadowblurTarget.image) + destroyImage(shadowblurTarget, device); + for (uint32_t i = 0; i < gbufferCount; ++i) createImage(gbufferTargets[i], device, memoryProperties, swapchain.width, swapchain.height, 1, gbufferFormats[i], VK_IMAGE_USAGE_COLOR_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); createImage(depthTarget, device, memoryProperties, swapchain.width, swapchain.height, 1, depthFormat, VK_IMAGE_USAGE_DEPTH_STENCIL_ATTACHMENT_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + createImage(shadowTarget, device, memoryProperties, swapchain.width, swapchain.height, 1, VK_FORMAT_R8_UNORM, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + createImage(shadowblurTarget, device, memoryProperties, swapchain.width, swapchain.height, 1, VK_FORMAT_R8_UNORM, VK_IMAGE_USAGE_STORAGE_BIT | VK_IMAGE_USAGE_SAMPLED_BIT); + // Note: previousPow2 makes sure all reductions are at most by 2x2 which makes sure they are conservative depthPyramidWidth = previousPow2(swapchain.width); depthPyramidHeight = previousPow2(swapchain.height); @@ -1499,19 +1515,85 @@ int main(int argc, const char** argv) if (raytracingSupported && shadingEnabled) { - vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, shadePipeline); + VkImageMemoryBarrier2 preshadowBarrier = + imageBarrier(shadowTarget.image, + 0, 0, VK_IMAGE_LAYOUT_UNDEFINED, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL); + + pipelineBarrier(commandBuffer, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 1, &preshadowBarrier); + + { + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, shadowPipeline); + + DescriptorInfo descriptors[] = { { shadowTarget.imageView, VK_IMAGE_LAYOUT_GENERAL }, { readSampler, depthTarget.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, tlas }; + vkCmdPushDescriptorSetWithTemplateKHR(commandBuffer, shadowProgram.updateTemplate, shadowProgram.layout, 0, descriptors); + + ShadeData shadeData = {}; + shadeData.cameraPosition = camera.position; + shadeData.sunDirection = sunDirection; + shadeData.inverseViewProjection = inverse(projection * view); + shadeData.imageSize = vec2(float(swapchain.width), float(swapchain.height)); + + vkCmdPushConstants(commandBuffer, shadowProgram.layout, shadowProgram.pushConstantStages, 0, sizeof(shadeData), &shadeData); + vkCmdDispatch(commandBuffer, getGroupCount(swapchain.width, shadowProgram.localSizeX), getGroupCount(swapchain.height, shadowProgram.localSizeY), 1); + } + + for (int pass = 0; pass < 2; ++pass) + { + const Image& blurFrom = pass == 0 ? shadowTarget : shadowblurTarget; + const Image& blurTo = pass == 0 ? shadowblurTarget : shadowTarget; + + VkImageMemoryBarrier2 blurToBarrier = + pass == 0 + ? imageBarrier(blurTo.image, + 0, 0, VK_IMAGE_LAYOUT_UNDEFINED, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL) + : imageBarrier(blurTo.image, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL); + + VkImageMemoryBarrier2 blurBarriers[] = { + imageBarrier(blurFrom.image, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL), + blurToBarrier + }; + + pipelineBarrier(commandBuffer, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, COUNTOF(blurBarriers), blurBarriers); - DescriptorInfo descriptors[] = { { swapchainImageViews[imageIndex], VK_IMAGE_LAYOUT_GENERAL }, { readSampler, gbufferTargets[0].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, { readSampler, gbufferTargets[1].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, { readSampler, depthTarget.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, tlas }; - vkCmdPushDescriptorSetWithTemplateKHR(commandBuffer, shadeProgram.updateTemplate, shadeProgram.layout, 0, descriptors); + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, shadowblurPipeline); - ShadeData shadeData = {}; - shadeData.cameraPosition = camera.position; - shadeData.sunDirection = sunDirection; - shadeData.inverseViewProjection = inverse(projection * view); - shadeData.imageSize = vec2(float(swapchain.width), float(swapchain.height)); + DescriptorInfo descriptors[] = { { blurTo.imageView, VK_IMAGE_LAYOUT_GENERAL }, { readSampler, blurFrom.imageView, VK_IMAGE_LAYOUT_GENERAL }, { readSampler, depthTarget.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL } }; + vkCmdPushDescriptorSetWithTemplateKHR(commandBuffer, shadowblurProgram.updateTemplate, shadowblurProgram.layout, 0, descriptors); - vkCmdPushConstants(commandBuffer, shadeProgram.layout, shadeProgram.pushConstantStages, 0, sizeof(shadeData), &shadeData); - vkCmdDispatch(commandBuffer, getGroupCount(swapchain.width, shadeProgram.localSizeX), getGroupCount(swapchain.height, shadeProgram.localSizeY), 1); + vec4 blurData = vec4(float(swapchain.width), float(swapchain.height), pass == 0 ? 1 : 0, 0); + + vkCmdPushConstants(commandBuffer, shadowblurProgram.layout, shadowblurProgram.pushConstantStages, 0, sizeof(blurData), &blurData); + vkCmdDispatch(commandBuffer, getGroupCount(swapchain.width, shadowblurProgram.localSizeX), getGroupCount(swapchain.height, shadowblurProgram.localSizeY), 1); + } + + VkImageMemoryBarrier2 shadowblurYBarrier = + imageBarrier(shadowTarget.image, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_WRITE_BIT, VK_IMAGE_LAYOUT_GENERAL, + VK_PIPELINE_STAGE_COMPUTE_SHADER_BIT, VK_ACCESS_SHADER_READ_BIT, VK_IMAGE_LAYOUT_GENERAL); + + pipelineBarrier(commandBuffer, VK_DEPENDENCY_BY_REGION_BIT, 0, nullptr, 1, &shadowblurYBarrier); + + { + vkCmdBindPipeline(commandBuffer, VK_PIPELINE_BIND_POINT_COMPUTE, shadePipeline); + + DescriptorInfo descriptors[] = { { swapchainImageViews[imageIndex], VK_IMAGE_LAYOUT_GENERAL }, { readSampler, gbufferTargets[0].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, { readSampler, gbufferTargets[1].imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, { readSampler, depthTarget.imageView, VK_IMAGE_LAYOUT_SHADER_READ_ONLY_OPTIMAL }, { readSampler, shadowTarget.imageView, VK_IMAGE_LAYOUT_GENERAL } }; + vkCmdPushDescriptorSetWithTemplateKHR(commandBuffer, shadeProgram.updateTemplate, shadeProgram.layout, 0, descriptors); + + ShadeData shadeData = {}; + shadeData.cameraPosition = camera.position; + shadeData.sunDirection = sunDirection; + shadeData.inverseViewProjection = inverse(projection * view); + shadeData.imageSize = vec2(float(swapchain.width), float(swapchain.height)); + + vkCmdPushConstants(commandBuffer, shadeProgram.layout, shadeProgram.pushConstantStages, 0, sizeof(shadeData), &shadeData); + vkCmdDispatch(commandBuffer, getGroupCount(swapchain.width, shadeProgram.localSizeX), getGroupCount(swapchain.height, shadeProgram.localSizeY), 1); + } } else { @@ -1628,6 +1710,11 @@ int main(int argc, const char** argv) destroyImage(depthPyramid, device); } + if (shadowTarget.image) + destroyImage(shadowTarget, device); + if (shadowblurTarget.image) + destroyImage(shadowblurTarget, device); + for (uint32_t i = 0; i < swapchain.imageCount; ++i) if (swapchainImageViews[i]) vkDestroyImageView(device, swapchainImageViews[i], 0); @@ -1712,6 +1799,12 @@ int main(int argc, const char** argv) vkDestroyPipeline(device, shadePipeline, 0); destroyProgram(device, shadeProgram); + vkDestroyPipeline(device, shadowPipeline, 0); + destroyProgram(device, shadowProgram); + + vkDestroyPipeline(device, shadowblurPipeline, 0); + destroyProgram(device, shadowblurProgram); + vkDestroyDescriptorSetLayout(device, textureSetLayout, 0); for (Shader& shader : shaders.shaders) diff --git a/src/shaders/shade.comp.glsl b/src/shaders/shade.comp.glsl index 5c9c7fb..27ab47b 100644 --- a/src/shaders/shade.comp.glsl +++ b/src/shaders/shade.comp.glsl @@ -4,8 +4,6 @@ #include "math.h" -#define RAYTRACE 1 - layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; struct ShadeData @@ -24,15 +22,12 @@ layout(push_constant) uniform block }; layout(binding = 0) uniform writeonly image2D outImage; + layout(binding = 1) uniform sampler2D gbufferImage0; layout(binding = 2) uniform sampler2D gbufferImage1; layout(binding = 3) uniform sampler2D depthImage; -#if RAYTRACE -#extension GL_EXT_ray_query: require - -layout(binding = 4) uniform accelerationStructureEXT tlas; -#endif +layout(binding = 4) uniform sampler2D shadowImage; void main() { @@ -61,17 +56,7 @@ void main() // TODO: this is not the BRDF we want float specular = pow(ndoth, mix(1, 64, gloss)) * gloss; - float shadow = 1; - -#if RAYTRACE - uint rayflags = gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsCullNoOpaqueEXT; - - rayQueryEXT rq; - rayQueryInitializeEXT(rq, tlas, rayflags, 0xff, wpos, 1e-2, shadeData.sunDirection, 1e3); - rayQueryProceedEXT(rq); - - shadow = (rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionNoneEXT) ? 1.0 : 0.0; -#endif + float shadow = texture(shadowImage, uv).r; vec3 outputColor = albedo.rgb * (ndotl * shadow + 0.05) + vec3(specular * shadow) + emissive; diff --git a/src/shaders/shadow.comp.glsl b/src/shaders/shadow.comp.glsl new file mode 100644 index 0000000..d5523c7 --- /dev/null +++ b/src/shaders/shadow.comp.glsl @@ -0,0 +1,59 @@ +#version 460 + +#extension GL_GOOGLE_include_directive: require +#extension GL_EXT_ray_query: require + +#include "math.h" + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +struct ShadeData +{ + vec3 cameraPosition; + vec3 sunDirection; + + mat4 inverseViewProjection; + + vec2 imageSize; +}; + +layout(push_constant) uniform block +{ + ShadeData shadeData; +}; + +layout(binding = 0) uniform writeonly image2D outImage; + +layout(binding = 1) uniform sampler2D depthImage; +layout(binding = 2) uniform accelerationStructureEXT tlas; + +void main() +{ + uvec2 pos = gl_GlobalInvocationID.xy; + vec2 uv = (vec2(pos) + 0.5) / shadeData.imageSize; + + float depth = texture(depthImage, uv).r; + + vec4 clip = vec4(uv.x * 2 - 1, 1 - uv.y * 2, depth, 1); + vec4 wposh = shadeData.inverseViewProjection * clip; + vec3 wpos = wposh.xyz / wposh.w; + + uint rayflags = gl_RayFlagsTerminateOnFirstHitEXT | gl_RayFlagsCullNoOpaqueEXT; + + vec3 dir = shadeData.sunDirection; + // TODO: a lot more tuning required here + // TODO: this should actually be doing cone sampling, not random XZ offsets + float dir0 = gradientNoise(vec2(pos.xy)); + float dir1 = gradientNoise(vec2(pos.yx)); + dir.x += (dir0 * 2 - 1) * 1e-2; + dir.z += (dir1 * 2 - 1) * 1e-2; + dir = normalize(dir); + + rayQueryEXT rq; + rayQueryInitializeEXT(rq, tlas, rayflags, 0xff, wpos, 1e-2, dir, 1e3); + rayQueryProceedEXT(rq); + + float shadow = (rayQueryGetIntersectionTypeEXT(rq, true) == gl_RayQueryCommittedIntersectionNoneEXT) ? 1.0 : 0.0; + + imageStore(outImage, ivec2(pos), vec4(shadow, 0, 0, 0)); +} diff --git a/src/shaders/shadowblur.comp.glsl b/src/shaders/shadowblur.comp.glsl new file mode 100644 index 0000000..7f5663a --- /dev/null +++ b/src/shaders/shadowblur.comp.glsl @@ -0,0 +1,57 @@ +#version 460 + +#define BLUR 1 + +#extension GL_GOOGLE_include_directive: require + +#include "math.h" + +layout(local_size_x = 8, local_size_y = 8, local_size_z = 1) in; + +layout(push_constant) uniform block +{ + vec2 imageSize; + float direction; +}; + +layout(binding = 0) uniform writeonly image2D outImage; + +layout(binding = 1) uniform sampler2D shadowImage; +layout(binding = 2) uniform sampler2D depthImage; + +void main() +{ + uvec2 pos = gl_GlobalInvocationID.xy; + vec2 uv = (vec2(pos) + 0.5) / imageSize; + +#if BLUR + float shadow = 0; + float accumw = 0; + + float znear = 1; + float depth = znear / texture(depthImage, uv).r; + + vec2 offsetScale = vec2(direction, 1 - direction) / imageSize; + + const int KERNEL = 10; + + for (int i = -KERNEL; i <= KERNEL; ++i) + { + vec2 uvoff = uv + vec2(i) * offsetScale; + + // TODO: a lot more tuning required here + float gw = exp2(-abs(i) / 10); + float dv = znear / texture(depthImage, uvoff).r; + float dw = exp2(-abs(depth - dv) * 20); + + shadow += texture(shadowImage, uvoff).r * (dw * gw); + accumw += dw * gw; + } + + shadow /= accumw; +#else + float shadow = texture(shadowImage, uv).r; +#endif + + imageStore(outImage, ivec2(pos), vec4(shadow, 0, 0, 0)); +}