From 092e19bf79986a32fdcb32b3ddec1e7e1b697be0 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sun, 7 Jul 2024 11:44:40 +0300 Subject: [PATCH 01/12] Make static descriptors allocator a global Makes it easier to grep where it's being used and cleans up ctor interfaces to the bare essentials. --- src/App.cpp | 51 +++++++++-------------- src/App.hpp | 5 --- src/gfx/DescriptorAllocator.cpp | 22 ++++++++-- src/gfx/DescriptorAllocator.hpp | 8 +++- src/main.cpp | 4 ++ src/render/ComputePass.cpp | 12 +++--- src/render/ComputePass.hpp | 4 +- src/render/DebugRenderer.cpp | 11 ++--- src/render/DebugRenderer.hpp | 8 +--- src/render/DeferredShading.cpp | 5 +-- src/render/DeferredShading.hpp | 4 +- src/render/ForwardRenderer.cpp | 11 ++--- src/render/ForwardRenderer.hpp | 8 +--- src/render/GBufferRenderer.cpp | 11 ++--- src/render/GBufferRenderer.hpp | 8 +--- src/render/ImageBasedLighting.cpp | 11 ++--- src/render/ImageBasedLighting.hpp | 4 +- src/render/LightClustering.cpp | 6 +-- src/render/LightClustering.hpp | 4 +- src/render/MeshletCuller.cpp | 12 +++--- src/render/MeshletCuller.hpp | 4 +- src/render/RtReference.cpp | 12 +++--- src/render/RtReference.hpp | 8 +--- src/render/TemporalAntiAliasing.cpp | 6 +-- src/render/TemporalAntiAliasing.hpp | 4 +- src/render/TextureDebug.cpp | 7 +--- src/render/TextureDebug.hpp | 4 +- src/render/TextureReadback.cpp | 7 +--- src/render/TextureReadback.hpp | 4 +- src/render/ToneMap.cpp | 7 +--- src/render/ToneMap.hpp | 4 +- src/render/dof/DepthOfField.cpp | 18 ++++---- src/render/dof/DepthOfField.hpp | 1 - src/render/dof/DepthOfFieldCombine.cpp | 7 +--- src/render/dof/DepthOfFieldCombine.hpp | 4 +- src/render/dof/DepthOfFieldDilate.cpp | 7 +--- src/render/dof/DepthOfFieldDilate.hpp | 4 +- src/render/dof/DepthOfFieldFilter.cpp | 6 +-- src/render/dof/DepthOfFieldFilter.hpp | 4 +- src/render/dof/DepthOfFieldFlatten.cpp | 7 +--- src/render/dof/DepthOfFieldFlatten.hpp | 4 +- src/render/dof/DepthOfFieldGather.cpp | 9 ++-- src/render/dof/DepthOfFieldGather.hpp | 4 +- src/render/dof/DepthOfFieldReduce.cpp | 7 +--- src/render/dof/DepthOfFieldReduce.hpp | 4 +- src/render/dof/DepthOfFieldSetup.cpp | 6 +-- src/render/dof/DepthOfFieldSetup.hpp | 4 +- src/render/rtdi/RtDiInitialReservoirs.cpp | 5 +-- src/render/rtdi/RtDiInitialReservoirs.hpp | 4 +- src/render/rtdi/RtDiSpatialReuse.cpp | 5 +-- src/render/rtdi/RtDiSpatialReuse.hpp | 4 +- src/render/rtdi/RtDiTrace.cpp | 13 +++--- src/render/rtdi/RtDiTrace.hpp | 8 +--- src/render/rtdi/RtDirectIllumination.cpp | 26 +++++------- src/render/rtdi/RtDirectIllumination.hpp | 4 +- src/scene/Camera.cpp | 12 ++---- src/scene/Camera.hpp | 8 +--- src/scene/WorldData.cpp | 2 + 58 files changed, 172 insertions(+), 291 deletions(-) diff --git a/src/App.cpp b/src/App.cpp index 66d4e294..1cccdcb4 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -77,7 +77,6 @@ StaticArray allocateCommandBuffers() App::App(std::filesystem::path scenePath) noexcept : m_fileChangePollingAlloc{megabytes(1)} , m_scenePath{WHEELS_MOV(scenePath)} -, m_staticDescriptorsAlloc{OwningPtr{gAllocators.general}} , m_swapchain{OwningPtr{gAllocators.general}} , m_cam{OwningPtr{gAllocators.general}} , m_world{OwningPtr{gAllocators.general}} @@ -116,8 +115,6 @@ App::~App() void App::init(ScopedScratch scopeAlloc) { - m_staticDescriptorsAlloc->init(); - { const SwapchainConfig &config = SwapchainConfig{ scopeAlloc.child_scope(), {gWindow.width(), gWindow.height()}}; @@ -134,62 +131,54 @@ void App::init(ScopedScratch scopeAlloc) vk::BufferUsageFlagBits::eStorageBuffer, asserted_cast(kilobytes(16)), "ConstantsRing"); - m_cam->init( - scopeAlloc.child_scope(), &m_constantsRing, - m_staticDescriptorsAlloc.get()); + m_cam->init(scopeAlloc.child_scope(), &m_constantsRing); m_world->init(scopeAlloc.child_scope(), &m_constantsRing, m_scenePath); const Timer gpuPassesInitTimer; m_lightClustering->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout(), m_world->dsLayouts()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), + m_world->dsLayouts()); m_forwardRenderer->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), + scopeAlloc.child_scope(), ForwardRenderer::InputDSLayouts{ .camera = m_cam->descriptorSetLayout(), .lightClusters = m_lightClustering->descriptorSetLayout(), .world = m_world->dsLayouts(), }); m_gbufferRenderer->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout(), m_world->dsLayouts()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), + m_world->dsLayouts()); m_deferredShading->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), + scopeAlloc.child_scope(), DeferredShading::InputDSLayouts{ .camera = m_cam->descriptorSetLayout(), .lightClusters = m_lightClustering->descriptorSetLayout(), .world = m_world->dsLayouts(), }); m_rtDirectIllumination->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout(), m_world->dsLayouts()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), + m_world->dsLayouts()); m_rtReference->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout(), m_world->dsLayouts()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), + m_world->dsLayouts()); m_skyboxRenderer->init( scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), m_world->dsLayouts()); m_debugRenderer->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout()); - m_toneMap->init(scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); + m_toneMap->init(scopeAlloc.child_scope()); m_imguiRenderer->init(m_swapchain->config()); - m_textureDebug->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get()); + m_textureDebug->init(scopeAlloc.child_scope()); m_depthOfField->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout()); - m_imageBasedLighting->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); + m_imageBasedLighting->init(scopeAlloc.child_scope()); m_temporalAntiAliasing->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_cam->descriptorSetLayout()); + scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); m_meshletCuller->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get(), - m_world->dsLayouts(), m_cam->descriptorSetLayout()); - m_textureReadback->init( - scopeAlloc.child_scope(), m_staticDescriptorsAlloc.get()); + scopeAlloc.child_scope(), m_world->dsLayouts(), + m_cam->descriptorSetLayout()); + m_textureReadback->init(scopeAlloc.child_scope()); m_recompileTime = std::chrono::file_clock::now(); LOG_INFO("GPU pass init took %.2fs", gpuPassesInitTimer.getSeconds()); diff --git a/src/App.hpp b/src/App.hpp index e911595e..87e7782c 100644 --- a/src/App.hpp +++ b/src/App.hpp @@ -109,11 +109,6 @@ class App wheels::TlsfAllocator m_fileChangePollingAlloc; std::filesystem::path m_scenePath; - // This allocator should only be used for the descriptors that can live - // until the end of the program. As such, reset() shouldn't be called so - // that users can rely on the descriptors being there once allocated. - wheels::OwningPtr m_staticDescriptorsAlloc; - wheels::OwningPtr m_swapchain; wheels::StaticArray m_commandBuffers; diff --git a/src/gfx/DescriptorAllocator.cpp b/src/gfx/DescriptorAllocator.cpp index 46a73b88..4aa0c8a9 100644 --- a/src/gfx/DescriptorAllocator.cpp +++ b/src/gfx/DescriptorAllocator.cpp @@ -69,12 +69,14 @@ constexpr StaticArray sDefaultPoolSizes{{ } // namespace +// This used everywhere and init()/destroy() order relative to other similar +// globals is handled in main() +// NOLINTNEXTLINE(cppcoreguidelines-avoid-non-const-global-variables) +DescriptorAllocator gStaticDescriptorsAlloc; + DescriptorAllocator::~DescriptorAllocator() { - // Don't check for m_initialized as we might be cleaning up after a failed - // init. - for (const vk::DescriptorPool p : m_pools) - gDevice.logical().destroy(p); + WHEELS_ASSERT(!m_initialized && "destroy() not called?"); } void DescriptorAllocator::init(vk::DescriptorPoolCreateFlags flags) @@ -88,6 +90,18 @@ void DescriptorAllocator::init(vk::DescriptorPoolCreateFlags flags) m_initialized = true; } +void DescriptorAllocator::destroy() +{ + // Don't check for m_initialized as we might be cleaning up after a failed + // init. + for (const vk::DescriptorPool p : m_pools) + gDevice.logical().destroy(p); + + m_pools.~Array(); + + m_initialized = false; +} + void DescriptorAllocator::resetPools() { WHEELS_ASSERT(m_initialized); diff --git a/src/gfx/DescriptorAllocator.hpp b/src/gfx/DescriptorAllocator.hpp index 05bbe804..e9a7bc7a 100644 --- a/src/gfx/DescriptorAllocator.hpp +++ b/src/gfx/DescriptorAllocator.hpp @@ -24,9 +24,9 @@ class DescriptorAllocator DescriptorAllocator &operator=(DescriptorAllocator const &) = delete; DescriptorAllocator &operator=(DescriptorAllocator &&) = delete; - // device needs to live as long as this void init( vk::DescriptorPoolCreateFlags flags = vk::DescriptorPoolCreateFlags{}); + void destroy(); // Reset frees all allocated descriptors/sets and makes the pools available // for new allocations @@ -51,10 +51,16 @@ class DescriptorAllocator wheels::Span debugNames, wheels::Span output, const void *allocatePNext); + // Any dynamic allocations need to be manually destroyed in destroy() bool m_initialized{false}; int32_t m_activePool{-1}; wheels::Array m_pools{gAllocators.general}; vk::DescriptorPoolCreateFlags m_flags; }; +// This allocator should only be used for the descriptors that can live +// until the end of the program. As such, reset() shouldn't be called so +// that users can rely on the descriptors being there once allocated. +extern DescriptorAllocator gStaticDescriptorsAlloc; + #endif // PROSPER_GFX_DESCRIPTOR_ALLOCATOR_HPP diff --git a/src/main.cpp b/src/main.cpp index 0208b964..3f9ef287 100644 --- a/src/main.cpp +++ b/src/main.cpp @@ -10,6 +10,7 @@ #include "Allocators.hpp" #include "App.hpp" #include "Window.hpp" +#include "gfx/DescriptorAllocator.hpp" #include "gfx/Device.hpp" #include "render/RenderResources.hpp" #include "utils/Logger.hpp" @@ -191,6 +192,9 @@ int main(int argc, char *argv[]) gRenderResources.init(); defer { gRenderResources.destroy(); }; + gStaticDescriptorsAlloc.init(); + defer { gStaticDescriptorsAlloc.destroy(); }; + gProfiler.init(); defer { gProfiler.destroy(); }; diff --git a/src/render/ComputePass.cpp b/src/render/ComputePass.cpp index 1d8f47aa..a4528075 100644 --- a/src/render/ComputePass.cpp +++ b/src/render/ComputePass.cpp @@ -29,12 +29,10 @@ ComputePass::~ComputePass() void ComputePass::init( wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, const std::function &shaderDefinitionCallback, const ComputePassOptions &options) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); WHEELS_ASSERT( (options.storageSetIndex == options.externalDsLayouts.size()) && "Implementation assumes that the pass storage set is the last set and " @@ -51,8 +49,8 @@ void ComputePass::init( throw std::runtime_error("Shader compilation failed"); createDescriptorSets( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - shader.debugName.c_str(), options.storageStageFlags); + scopeAlloc.child_scope(), shader.debugName.c_str(), + options.storageStageFlags); createPipeline( scopeAlloc.child_scope(), options.externalDsLayouts, shader.debugName); @@ -244,8 +242,8 @@ void ComputePass::destroyPipelines() } void ComputePass::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const char *debugName, vk::ShaderStageFlags storageStageFlags) + ScopedScratch scopeAlloc, const char *debugName, + vk::ShaderStageFlags storageStageFlags) { WHEELS_ASSERT(m_shaderReflection.has_value()); m_storageSetLayout = m_shaderReflection->createDescriptorSetLayout( @@ -257,7 +255,7 @@ void ComputePass::createDescriptorSets( InlineArray debugNames; layouts.resize(sets.size(), m_storageSetLayout); debugNames.resize(sets.size(), debugName); - staticDescriptorsAlloc->allocate(layouts, debugNames, sets.mut_span()); + gStaticDescriptorsAlloc.allocate(layouts, debugNames, sets.mut_span()); } } diff --git a/src/render/ComputePass.hpp b/src/render/ComputePass.hpp index 32101b41..cd7b16d4 100644 --- a/src/render/ComputePass.hpp +++ b/src/render/ComputePass.hpp @@ -43,7 +43,6 @@ class ComputePass void init( wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, const std::function &shaderDefinitionCallback, const ComputePassOptions &options = ComputePassOptions{}); @@ -109,8 +108,7 @@ class ComputePass void destroyPipelines(); void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, const char *, + wheels::ScopedScratch scopeAlloc, const char *, vk::ShaderStageFlags storageStageFlags); void createPipeline( diff --git a/src/render/DebugRenderer.cpp b/src/render/DebugRenderer.cpp index 4beefa56..3278e490 100644 --- a/src/render/DebugRenderer.cpp +++ b/src/render/DebugRenderer.cpp @@ -50,11 +50,9 @@ DebugRenderer::~DebugRenderer() } void DebugRenderer::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const vk::DescriptorSetLayout camDSLayout) + ScopedScratch scopeAlloc, const vk::DescriptorSetLayout camDSLayout) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); LOG_INFO("Creating DebugRenderer"); @@ -77,7 +75,7 @@ void DebugRenderer::init( }), }; - createDescriptorSets(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSets(scopeAlloc.child_scope()); createGraphicsPipeline(camDSLayout); m_initialized = true; @@ -238,8 +236,7 @@ void DebugRenderer::destroyGraphicsPipeline() gDevice.logical().destroy(m_pipelineLayout); } -void DebugRenderer::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DebugRenderer::createDescriptorSets(ScopedScratch scopeAlloc) { WHEELS_ASSERT(m_vertReflection.has_value()); m_linesDSLayout = m_vertReflection->createDescriptorSetLayout( @@ -250,7 +247,7 @@ void DebugRenderer::createDescriptorSets( m_linesDSLayout}; const StaticArray debugNames{ "DebugRenderer"}; - staticDescriptorsAlloc->allocate( + gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_linesDescriptorSets.mut_span()); for (size_t i = 0; i < m_linesDescriptorSets.size(); ++i) diff --git a/src/render/DebugRenderer.hpp b/src/render/DebugRenderer.hpp index fd2dfdf4..8d32b069 100644 --- a/src/render/DebugRenderer.hpp +++ b/src/render/DebugRenderer.hpp @@ -25,9 +25,7 @@ class DebugRenderer DebugRenderer &operator=(DebugRenderer &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout); + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout); void recompileShaders( wheels::ScopedScratch scopeAlloc, @@ -49,9 +47,7 @@ class DebugRenderer void destroyGraphicsPipeline(); - void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSets(wheels::ScopedScratch scopeAlloc); void createGraphicsPipeline(vk::DescriptorSetLayout camDSLayout); bool m_initialized{false}; diff --git a/src/render/DeferredShading.cpp b/src/render/DeferredShading.cpp index c1920883..14c5f8e4 100644 --- a/src/render/DeferredShading.cpp +++ b/src/render/DeferredShading.cpp @@ -88,13 +88,12 @@ StaticArray externalDsLayouts( } // namespace void DeferredShading::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts) + ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, + WHEELS_MOV(scopeAlloc), [&dsLayouts](Allocator &alloc) { return shaderDefinitionCallback(alloc, dsLayouts.world); }, ComputePassOptions{ diff --git a/src/render/DeferredShading.hpp b/src/render/DeferredShading.hpp index 1e95fc87..313a28cf 100644 --- a/src/render/DeferredShading.hpp +++ b/src/render/DeferredShading.hpp @@ -32,9 +32,7 @@ class DeferredShading const WorldDSLayouts &world; }; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts); + wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/ForwardRenderer.cpp b/src/render/ForwardRenderer.cpp index dc4447c9..5de5dc6a 100644 --- a/src/render/ForwardRenderer.cpp +++ b/src/render/ForwardRenderer.cpp @@ -63,18 +63,16 @@ ForwardRenderer::~ForwardRenderer() } void ForwardRenderer::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts) + ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); LOG_INFO("Creating ForwardRenderer"); if (!compileShaders(scopeAlloc.child_scope(), dsLayouts.world)) throw std::runtime_error("ForwardRenderer shader compilation failed"); - createDescriptorSets(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSets(scopeAlloc.child_scope()); createGraphicsPipelines(dsLayouts); m_initialized = true; @@ -252,8 +250,7 @@ bool ForwardRenderer::compileShaders( return false; } -void ForwardRenderer::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void ForwardRenderer::createDescriptorSets(ScopedScratch scopeAlloc) { WHEELS_ASSERT(m_meshReflection.has_value()); m_meshSetLayout = m_meshReflection->createDescriptorSetLayout( @@ -264,7 +261,7 @@ void ForwardRenderer::createDescriptorSets( layouts{m_meshSetLayout}; const StaticArray debugNames{ "ForwardMesh"}; - staticDescriptorsAlloc->allocate( + gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_meshSets.mut_span()); } diff --git a/src/render/ForwardRenderer.hpp b/src/render/ForwardRenderer.hpp index 17c7e9e7..44e3dbe2 100644 --- a/src/render/ForwardRenderer.hpp +++ b/src/render/ForwardRenderer.hpp @@ -31,9 +31,7 @@ class ForwardRenderer const WorldDSLayouts &world; }; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts); + wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts); void recompileShaders( wheels::ScopedScratch scopeAlloc, @@ -70,9 +68,7 @@ class ForwardRenderer [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); - void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSets(wheels::ScopedScratch scopeAlloc); void updateDescriptorSet( wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, bool transparents, diff --git a/src/render/GBufferRenderer.cpp b/src/render/GBufferRenderer.cpp index 66199c62..9b099ab7 100644 --- a/src/render/GBufferRenderer.cpp +++ b/src/render/GBufferRenderer.cpp @@ -54,19 +54,17 @@ struct Attachments } // namespace void GBufferRenderer::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const vk::DescriptorSetLayout camDSLayout, + ScopedScratch scopeAlloc, const vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); LOG_INFO("Creating GBufferRenderer"); if (!compileShaders(scopeAlloc.child_scope(), worldDSLayouts)) throw std::runtime_error("GBufferRenderer shader compilation failed"); - createDescriptorSets(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSets(scopeAlloc.child_scope()); createGraphicsPipelines(camDSLayout, worldDSLayouts); m_initialized = true; @@ -377,8 +375,7 @@ bool GBufferRenderer::compileShaders( return false; } -void GBufferRenderer::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void GBufferRenderer::createDescriptorSets(ScopedScratch scopeAlloc) { WHEELS_ASSERT(m_meshReflection.has_value()); m_meshSetLayout = m_meshReflection->createDescriptorSetLayout( @@ -389,7 +386,7 @@ void GBufferRenderer::createDescriptorSets( m_meshSetLayout}; const StaticArray debugNames{ "GBufferMesh"}; - staticDescriptorsAlloc->allocate( + gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_meshSets.mut_span()); } diff --git a/src/render/GBufferRenderer.hpp b/src/render/GBufferRenderer.hpp index 5cbbabd5..e447aa27 100644 --- a/src/render/GBufferRenderer.hpp +++ b/src/render/GBufferRenderer.hpp @@ -32,9 +32,7 @@ class GBufferRenderer GBufferRenderer &operator=(GBufferRenderer &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); void recompileShaders( @@ -53,9 +51,7 @@ class GBufferRenderer [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); - void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSets(wheels::ScopedScratch scopeAlloc); void updateDescriptorSet( wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, const MeshletCullerOutput &cullerOutput, BufferHandle inOutDrawStats); diff --git a/src/render/ImageBasedLighting.cpp b/src/render/ImageBasedLighting.cpp index 16ca565d..a1213ec7 100644 --- a/src/render/ImageBasedLighting.cpp +++ b/src/render/ImageBasedLighting.cpp @@ -65,20 +65,17 @@ ComputePass::Shader prefilterRadianceShaderDefinitionCallback(Allocator &alloc) } // namespace -void ImageBasedLighting::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void ImageBasedLighting::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); m_sampleIrradiance.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - sampleIrradianceShaderDefinitionCallback); + scopeAlloc.child_scope(), sampleIrradianceShaderDefinitionCallback); m_integrateSpecularBrdf.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, + scopeAlloc.child_scope(), integrateSpecularBrdfShaderDefinitionCallback); m_prefilterRadiance.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - prefilterRadianceShaderDefinitionCallback); + scopeAlloc.child_scope(), prefilterRadianceShaderDefinitionCallback); m_initialized = true; } diff --git a/src/render/ImageBasedLighting.hpp b/src/render/ImageBasedLighting.hpp index 89cf0392..25567529 100644 --- a/src/render/ImageBasedLighting.hpp +++ b/src/render/ImageBasedLighting.hpp @@ -20,9 +20,7 @@ class ImageBasedLighting ImageBasedLighting &operator=(const ImageBasedLighting &other) = delete; ImageBasedLighting &operator=(ImageBasedLighting &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); [[nodiscard]] bool isGenerated() const; diff --git a/src/render/LightClustering.cpp b/src/render/LightClustering.cpp index 8e29c648..0c52f5c8 100644 --- a/src/render/LightClustering.cpp +++ b/src/render/LightClustering.cpp @@ -125,15 +125,13 @@ LightClusteringOutput createOutputs(const vk::Extent2D &renderExtent) } // namespace void LightClustering::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const vk::DescriptorSetLayout camDSLayout, + ScopedScratch scopeAlloc, const vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback, + WHEELS_MOV(scopeAlloc), shaderDefinitionCallback, ComputePassOptions{ .storageSetIndex = LightClustersBindingSet, .externalDsLayouts = externalDsLayouts(camDSLayout, worldDSLayouts), diff --git a/src/render/LightClustering.hpp b/src/render/LightClustering.hpp index 25f1f144..1643afb5 100644 --- a/src/render/LightClustering.hpp +++ b/src/render/LightClustering.hpp @@ -38,9 +38,7 @@ class LightClustering LightClustering &operator=(LightClustering &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); [[nodiscard]] vk::DescriptorSetLayout descriptorSetLayout() const; diff --git a/src/render/MeshletCuller.cpp b/src/render/MeshletCuller.cpp index e4f0841c..ead54407 100644 --- a/src/render/MeshletCuller.cpp +++ b/src/render/MeshletCuller.cpp @@ -138,13 +138,13 @@ cullerExternalDsLayouts( } // namespace void MeshletCuller::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const WorldDSLayouts &worldDsLayouts, vk::DescriptorSetLayout camDsLayout) + ScopedScratch scopeAlloc, const WorldDSLayouts &worldDsLayouts, + vk::DescriptorSetLayout camDsLayout) { WHEELS_ASSERT(!m_initialized); m_drawListGenerator.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, + scopeAlloc.child_scope(), [&worldDsLayouts](Allocator &alloc) { return generatorDefinitionCallback(alloc, worldDsLayouts); }, ComputePassOptions{ @@ -153,14 +153,12 @@ void MeshletCuller::init( .externalDsLayouts = generatorExternalDsLayouts(worldDsLayouts), }); m_cullerArgumentsWriter.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - argumentsWriterDefinitionCallback, + scopeAlloc.child_scope(), argumentsWriterDefinitionCallback, ComputePassOptions{ .perFrameRecordLimit = sMaxRecordsPerFrame, }); m_drawListCuller.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - cullerDefinitionCallback, + WHEELS_MOV(scopeAlloc), cullerDefinitionCallback, ComputePassOptions{ .storageSetIndex = CullerStorageBindingSet, .perFrameRecordLimit = sMaxRecordsPerFrame, diff --git a/src/render/MeshletCuller.hpp b/src/render/MeshletCuller.hpp index c99aa2aa..b27f5a9b 100644 --- a/src/render/MeshletCuller.hpp +++ b/src/render/MeshletCuller.hpp @@ -33,9 +33,7 @@ class MeshletCuller MeshletCuller &operator=(MeshletCuller &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - const WorldDSLayouts &worldDsLayouts, + wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDsLayouts, vk::DescriptorSetLayout camDsLayout); void recompileShaders( diff --git a/src/render/RtReference.cpp b/src/render/RtReference.cpp index f50347c7..4c8b6325 100644 --- a/src/render/RtReference.cpp +++ b/src/render/RtReference.cpp @@ -109,18 +109,17 @@ RtReference::~RtReference() } void RtReference::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, + const WorldDSLayouts &worldDSLayouts) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); LOG_INFO("Creating RtReference"); if (!compileShaders(scopeAlloc.child_scope(), worldDSLayouts)) throw std::runtime_error("RtReference shader compilation failed"); - createDescriptorSets(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSets(scopeAlloc.child_scope()); createPipeline(camDSLayout, worldDSLayouts); createShaderBindingTable(scopeAlloc.child_scope()); @@ -563,8 +562,7 @@ bool RtReference::compileShaders( return false; } -void RtReference::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void RtReference::createDescriptorSets(ScopedScratch scopeAlloc) { m_descriptorSetLayout = m_raygenReflection->createDescriptorSetLayout( WHEELS_MOV(scopeAlloc), OutputBindingSet, @@ -574,7 +572,7 @@ void RtReference::createDescriptorSets( m_descriptorSetLayout}; const StaticArray debugNames{ "RtReference"}; - staticDescriptorsAlloc->allocate( + gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_descriptorSets.mut_span()); } diff --git a/src/render/RtReference.hpp b/src/render/RtReference.hpp index 5a050e5a..be900281 100644 --- a/src/render/RtReference.hpp +++ b/src/render/RtReference.hpp @@ -28,9 +28,7 @@ class RtReference RtReference &operator=(RtReference &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); void recompileShaders( @@ -65,9 +63,7 @@ class RtReference [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); - void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSets(wheels::ScopedScratch scopeAlloc); void updateDescriptorSet( wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, ImageHandle illumination); diff --git a/src/render/TemporalAntiAliasing.cpp b/src/render/TemporalAntiAliasing.cpp index 515c6195..6bfa8b76 100644 --- a/src/render/TemporalAntiAliasing.cpp +++ b/src/render/TemporalAntiAliasing.cpp @@ -98,14 +98,12 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace void TemporalAntiAliasing::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDsLayout) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback, + WHEELS_MOV(scopeAlloc), shaderDefinitionCallback, ComputePassOptions{ .storageSetIndex = StorageBindingSet, .externalDsLayouts = Span{&camDsLayout, 1}, diff --git a/src/render/TemporalAntiAliasing.hpp b/src/render/TemporalAntiAliasing.hpp index 99994149..89aee08c 100644 --- a/src/render/TemporalAntiAliasing.hpp +++ b/src/render/TemporalAntiAliasing.hpp @@ -48,9 +48,7 @@ class TemporalAntiAliasing TemporalAntiAliasing &operator=(TemporalAntiAliasing &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDsLayout); + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/TextureDebug.cpp b/src/render/TextureDebug.cpp index 03248b6f..9fccb8b1 100644 --- a/src/render/TextureDebug.cpp +++ b/src/render/TextureDebug.cpp @@ -78,14 +78,11 @@ TextureDebug::~TextureDebug() gDevice.destroy(b); } -void TextureDebug::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void TextureDebug::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); for (Buffer &b : m_readbackBuffers) { diff --git a/src/render/TextureDebug.hpp b/src/render/TextureDebug.hpp index bbe8edbc..1a4612d7 100644 --- a/src/render/TextureDebug.hpp +++ b/src/render/TextureDebug.hpp @@ -42,9 +42,7 @@ class TextureDebug TextureDebug &operator=(const TextureDebug &other) = delete; TextureDebug &operator=(TextureDebug &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/TextureReadback.cpp b/src/render/TextureReadback.cpp index 8b9bd60b..f4bad54a 100644 --- a/src/render/TextureReadback.cpp +++ b/src/render/TextureReadback.cpp @@ -32,14 +32,11 @@ TextureReadback::~TextureReadback() gDevice.destroy(m_buffer); } -void TextureReadback::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void TextureReadback::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); m_buffer = gDevice.createBuffer(BufferCreateInfo{ .desc = BufferDescription{ diff --git a/src/render/TextureReadback.hpp b/src/render/TextureReadback.hpp index 73724529..c7655bb7 100644 --- a/src/render/TextureReadback.hpp +++ b/src/render/TextureReadback.hpp @@ -23,9 +23,7 @@ class TextureReadback TextureReadback &operator=(const TextureReadback &other) = delete; TextureReadback &operator=(TextureReadback &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void startFrame(); diff --git a/src/render/ToneMap.cpp b/src/render/ToneMap.cpp index aa63bca3..75721f9e 100644 --- a/src/render/ToneMap.cpp +++ b/src/render/ToneMap.cpp @@ -33,14 +33,11 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace -void ToneMap::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void ToneMap::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(scopeAlloc.child_scope(), shaderDefinitionCallback); m_lut.init( WHEELS_MOV(scopeAlloc), resPath("texture/tony_mc_mapface.dds"), diff --git a/src/render/ToneMap.hpp b/src/render/ToneMap.hpp index f98c3d7e..e50889cf 100644 --- a/src/render/ToneMap.hpp +++ b/src/render/ToneMap.hpp @@ -23,9 +23,7 @@ class ToneMap ToneMap &operator=(const ToneMap &other) = delete; ToneMap &operator=(ToneMap &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfField.cpp b/src/render/dof/DepthOfField.cpp index ff5fabb7..c47181cf 100644 --- a/src/render/dof/DepthOfField.cpp +++ b/src/render/dof/DepthOfField.cpp @@ -6,19 +6,17 @@ using namespace wheels; void DepthOfField::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout cameraDsLayout) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout cameraDsLayout) { WHEELS_ASSERT(!m_initialized); - m_setupPass.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, cameraDsLayout); - m_reducePass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); - m_flattenPass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); - m_dilatePass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); - m_gatherPass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); - m_filterPass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); - m_combinePass.init(scopeAlloc.child_scope(), staticDescriptorsAlloc); + m_setupPass.init(scopeAlloc.child_scope(), cameraDsLayout); + m_reducePass.init(scopeAlloc.child_scope()); + m_flattenPass.init(scopeAlloc.child_scope()); + m_dilatePass.init(scopeAlloc.child_scope()); + m_gatherPass.init(scopeAlloc.child_scope()); + m_filterPass.init(scopeAlloc.child_scope()); + m_combinePass.init(scopeAlloc.child_scope()); m_initialized = true; } diff --git a/src/render/dof/DepthOfField.hpp b/src/render/dof/DepthOfField.hpp index 8fa39935..f573c4fc 100644 --- a/src/render/dof/DepthOfField.hpp +++ b/src/render/dof/DepthOfField.hpp @@ -38,7 +38,6 @@ class DepthOfField void init( wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, vk::DescriptorSetLayout cameraDsLayout); void recompileShaders( diff --git a/src/render/dof/DepthOfFieldCombine.cpp b/src/render/dof/DepthOfFieldCombine.cpp index 09fe3562..aba2af8a 100644 --- a/src/render/dof/DepthOfFieldCombine.cpp +++ b/src/render/dof/DepthOfFieldCombine.cpp @@ -26,14 +26,11 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace -void DepthOfFieldCombine::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldCombine::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); m_initialized = true; } diff --git a/src/render/dof/DepthOfFieldCombine.hpp b/src/render/dof/DepthOfFieldCombine.hpp index 49574f3b..5b684eae 100644 --- a/src/render/dof/DepthOfFieldCombine.hpp +++ b/src/render/dof/DepthOfFieldCombine.hpp @@ -27,9 +27,7 @@ class DepthOfFieldCombine DepthOfFieldCombine &operator=(const DepthOfFieldCombine &other) = delete; DepthOfFieldCombine &operator=(DepthOfFieldCombine &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldDilate.cpp b/src/render/dof/DepthOfFieldDilate.cpp index 62fe3225..76065f15 100644 --- a/src/render/dof/DepthOfFieldDilate.cpp +++ b/src/render/dof/DepthOfFieldDilate.cpp @@ -34,14 +34,11 @@ struct PcBlock } // namespace -void DepthOfFieldDilate::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldDilate::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); m_initialized = true; } diff --git a/src/render/dof/DepthOfFieldDilate.hpp b/src/render/dof/DepthOfFieldDilate.hpp index 0f6db819..f6ea2624 100644 --- a/src/render/dof/DepthOfFieldDilate.hpp +++ b/src/render/dof/DepthOfFieldDilate.hpp @@ -27,9 +27,7 @@ class DepthOfFieldDilate DepthOfFieldDilate &operator=(const DepthOfFieldDilate &other) = delete; DepthOfFieldDilate &operator=(DepthOfFieldDilate &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldFilter.cpp b/src/render/dof/DepthOfFieldFilter.cpp index d2d5f40e..a53fc088 100644 --- a/src/render/dof/DepthOfFieldFilter.cpp +++ b/src/render/dof/DepthOfFieldFilter.cpp @@ -21,14 +21,12 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace -void DepthOfFieldFilter::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldFilter::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback, + WHEELS_MOV(scopeAlloc), shaderDefinitionCallback, ComputePassOptions{ .perFrameRecordLimit = 2, }); diff --git a/src/render/dof/DepthOfFieldFilter.hpp b/src/render/dof/DepthOfFieldFilter.hpp index 606822be..0018b3d1 100644 --- a/src/render/dof/DepthOfFieldFilter.hpp +++ b/src/render/dof/DepthOfFieldFilter.hpp @@ -21,9 +21,7 @@ class DepthOfFieldFilter DepthOfFieldFilter &operator=(const DepthOfFieldFilter &other) = delete; DepthOfFieldFilter &operator=(DepthOfFieldFilter &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldFlatten.cpp b/src/render/dof/DepthOfFieldFlatten.cpp index 08be760c..e7d1c510 100644 --- a/src/render/dof/DepthOfFieldFlatten.cpp +++ b/src/render/dof/DepthOfFieldFlatten.cpp @@ -28,14 +28,11 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace -void DepthOfFieldFlatten::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldFlatten::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); m_initialized = true; } diff --git a/src/render/dof/DepthOfFieldFlatten.hpp b/src/render/dof/DepthOfFieldFlatten.hpp index 7d220bc7..877cd93b 100644 --- a/src/render/dof/DepthOfFieldFlatten.hpp +++ b/src/render/dof/DepthOfFieldFlatten.hpp @@ -28,9 +28,7 @@ class DepthOfFieldFlatten DepthOfFieldFlatten &operator=(const DepthOfFieldFlatten &other) = delete; DepthOfFieldFlatten &operator=(DepthOfFieldFlatten &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldGather.cpp b/src/render/dof/DepthOfFieldGather.cpp index d9cf7e2f..5c5bc7f9 100644 --- a/src/render/dof/DepthOfFieldGather.cpp +++ b/src/render/dof/DepthOfFieldGather.cpp @@ -47,17 +47,14 @@ ComputePass::Shader foregroundDefinitionCallback(Allocator &alloc) } // namespace -void DepthOfFieldGather::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldGather::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); m_backgroundPass.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - backgroundDefinitionCallback); + scopeAlloc.child_scope(), backgroundDefinitionCallback); m_foregroundPass.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - foregroundDefinitionCallback); + scopeAlloc.child_scope(), foregroundDefinitionCallback); m_initialized = true; } diff --git a/src/render/dof/DepthOfFieldGather.hpp b/src/render/dof/DepthOfFieldGather.hpp index f45bf237..8d060ad9 100644 --- a/src/render/dof/DepthOfFieldGather.hpp +++ b/src/render/dof/DepthOfFieldGather.hpp @@ -33,9 +33,7 @@ class DepthOfFieldGather DepthOfFieldGather &operator=(const DepthOfFieldGather &other) = delete; DepthOfFieldGather &operator=(DepthOfFieldGather &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldReduce.cpp b/src/render/dof/DepthOfFieldReduce.cpp index 8e2c5f3b..3f3a64c5 100644 --- a/src/render/dof/DepthOfFieldReduce.cpp +++ b/src/render/dof/DepthOfFieldReduce.cpp @@ -60,14 +60,11 @@ DepthOfFieldReduce::~DepthOfFieldReduce() gDevice.destroy(m_atomicCounter); } -void DepthOfFieldReduce::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void DepthOfFieldReduce::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback); + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); // Don't use a shared resource as this is tiny and the clear can be skipped // after the first frame if we know nothing else uses it. m_atomicCounter = gDevice.createBuffer(BufferCreateInfo{ diff --git a/src/render/dof/DepthOfFieldReduce.hpp b/src/render/dof/DepthOfFieldReduce.hpp index 960bcff1..d9ce190f 100644 --- a/src/render/dof/DepthOfFieldReduce.hpp +++ b/src/render/dof/DepthOfFieldReduce.hpp @@ -27,9 +27,7 @@ class DepthOfFieldReduce DepthOfFieldReduce &operator=(const DepthOfFieldReduce &other) = delete; DepthOfFieldReduce &operator=(DepthOfFieldReduce &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/dof/DepthOfFieldSetup.cpp b/src/render/dof/DepthOfFieldSetup.cpp index 2da2cced..fffffc96 100644 --- a/src/render/dof/DepthOfFieldSetup.cpp +++ b/src/render/dof/DepthOfFieldSetup.cpp @@ -49,14 +49,12 @@ ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) } // namespace void DepthOfFieldSetup::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDsLayout) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, - shaderDefinitionCallback, + WHEELS_MOV(scopeAlloc), shaderDefinitionCallback, ComputePassOptions{ .storageSetIndex = StorageBindingSet, .externalDsLayouts = Span{&camDsLayout, 1}, diff --git a/src/render/dof/DepthOfFieldSetup.hpp b/src/render/dof/DepthOfFieldSetup.hpp index 37b3fa66..6354e487 100644 --- a/src/render/dof/DepthOfFieldSetup.hpp +++ b/src/render/dof/DepthOfFieldSetup.hpp @@ -28,9 +28,7 @@ class DepthOfFieldSetup DepthOfFieldSetup &operator=(DepthOfFieldSetup &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDsLayout); + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout); void recompileShaders( wheels::ScopedScratch scopeAlloc, diff --git a/src/render/rtdi/RtDiInitialReservoirs.cpp b/src/render/rtdi/RtDiInitialReservoirs.cpp index b1e043cb..53fb8a22 100644 --- a/src/render/rtdi/RtDiInitialReservoirs.cpp +++ b/src/render/rtdi/RtDiInitialReservoirs.cpp @@ -65,13 +65,12 @@ StaticArray externalDsLayouts( } // namespace void RtDiInitialReservoirs::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts) + ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, + WHEELS_MOV(scopeAlloc), [&dsLayouts](Allocator &alloc) { return shaderDefinitionCallback(alloc, dsLayouts.world); }, ComputePassOptions{ diff --git a/src/render/rtdi/RtDiInitialReservoirs.hpp b/src/render/rtdi/RtDiInitialReservoirs.hpp index 483eaa60..c18bd41c 100644 --- a/src/render/rtdi/RtDiInitialReservoirs.hpp +++ b/src/render/rtdi/RtDiInitialReservoirs.hpp @@ -30,9 +30,7 @@ class RtDiInitialReservoirs const WorldDSLayouts &world; }; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts); + wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts); // Returns true if recompile happened bool recompileShaders( diff --git a/src/render/rtdi/RtDiSpatialReuse.cpp b/src/render/rtdi/RtDiSpatialReuse.cpp index f13f0be6..f3173f9a 100644 --- a/src/render/rtdi/RtDiSpatialReuse.cpp +++ b/src/render/rtdi/RtDiSpatialReuse.cpp @@ -65,13 +65,12 @@ StaticArray externalDsLayouts( } // namespace void RtDiSpatialReuse::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts) + ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts) { WHEELS_ASSERT(!m_initialized); m_computePass.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, + WHEELS_MOV(scopeAlloc), [&dsLayouts](Allocator &alloc) { return shaderDefinitionCallback(alloc, dsLayouts.world); }, ComputePassOptions{ diff --git a/src/render/rtdi/RtDiSpatialReuse.hpp b/src/render/rtdi/RtDiSpatialReuse.hpp index 8dfb4130..0a80afbd 100644 --- a/src/render/rtdi/RtDiSpatialReuse.hpp +++ b/src/render/rtdi/RtDiSpatialReuse.hpp @@ -29,9 +29,7 @@ class RtDiSpatialReuse const WorldDSLayouts &world; }; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - const InputDSLayouts &dsLayouts); + wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts); // Returns true if recompile happened bool recompileShaders( diff --git a/src/render/rtdi/RtDiTrace.cpp b/src/render/rtdi/RtDiTrace.cpp index 2059a146..73110cb8 100644 --- a/src/render/rtdi/RtDiTrace.cpp +++ b/src/render/rtdi/RtDiTrace.cpp @@ -14,7 +14,6 @@ #include "../RenderTargets.hpp" #include "../Utils.hpp" - #include using namespace wheels; @@ -99,18 +98,17 @@ RtDiTrace::~RtDiTrace() } void RtDiTrace::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, + const WorldDSLayouts &worldDSLayouts) { WHEELS_ASSERT(!m_initialized); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); LOG_INFO("Creating RtDiTrace"); if (!compileShaders(scopeAlloc.child_scope(), worldDSLayouts)) throw std::runtime_error("RtDiTrace shader compilation failed"); - createDescriptorSets(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSets(scopeAlloc.child_scope()); createPipeline(camDSLayout, worldDSLayouts); createShaderBindingTable(scopeAlloc.child_scope()); @@ -520,8 +518,7 @@ bool RtDiTrace::compileShaders( return false; } -void RtDiTrace::createDescriptorSets( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void RtDiTrace::createDescriptorSets(ScopedScratch scopeAlloc) { m_descriptorSetLayout = m_raygenReflection->createDescriptorSetLayout( WHEELS_MOV(scopeAlloc), StorageBindingSet, @@ -531,7 +528,7 @@ void RtDiTrace::createDescriptorSets( m_descriptorSetLayout}; const StaticArray debugNames{ "RtDiTrace"}; - staticDescriptorsAlloc->allocate( + gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_descriptorSets.mut_span()); } diff --git a/src/render/rtdi/RtDiTrace.hpp b/src/render/rtdi/RtDiTrace.hpp index c5fe40c8..3a0ff5ac 100644 --- a/src/render/rtdi/RtDiTrace.hpp +++ b/src/render/rtdi/RtDiTrace.hpp @@ -25,9 +25,7 @@ class RtDiTrace RtDiTrace &operator=(RtDiTrace &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); void recompileShaders( @@ -58,9 +56,7 @@ class RtDiTrace [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); - void createDescriptorSets( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSets(wheels::ScopedScratch scopeAlloc); void updateDescriptorSet( wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, Input const &inputs, ImageHandle illumination); diff --git a/src/render/rtdi/RtDirectIllumination.cpp b/src/render/rtdi/RtDirectIllumination.cpp index d5670aff..4d055bd0 100644 --- a/src/render/rtdi/RtDirectIllumination.cpp +++ b/src/render/rtdi/RtDirectIllumination.cpp @@ -8,26 +8,22 @@ using namespace wheels; void RtDirectIllumination::init( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts) + ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, + const WorldDSLayouts &worldDSLayouts) { WHEELS_ASSERT(!m_initialized); m_initialReservoirs.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - RtDiInitialReservoirs::InputDSLayouts{ - .camera = camDSLayout, - .world = worldDSLayouts, - }); + scopeAlloc.child_scope(), RtDiInitialReservoirs::InputDSLayouts{ + .camera = camDSLayout, + .world = worldDSLayouts, + }); m_spatialReuse.init( - scopeAlloc.child_scope(), staticDescriptorsAlloc, - RtDiSpatialReuse::InputDSLayouts{ - .camera = camDSLayout, - .world = worldDSLayouts, - }); - m_trace.init( - WHEELS_MOV(scopeAlloc), staticDescriptorsAlloc, camDSLayout, - worldDSLayouts); + scopeAlloc.child_scope(), RtDiSpatialReuse::InputDSLayouts{ + .camera = camDSLayout, + .world = worldDSLayouts, + }); + m_trace.init(WHEELS_MOV(scopeAlloc), camDSLayout, worldDSLayouts); m_initialized = true; } diff --git a/src/render/rtdi/RtDirectIllumination.hpp b/src/render/rtdi/RtDirectIllumination.hpp index 622db4a8..524a25eb 100644 --- a/src/render/rtdi/RtDirectIllumination.hpp +++ b/src/render/rtdi/RtDirectIllumination.hpp @@ -26,9 +26,7 @@ class RtDirectIllumination RtDirectIllumination &operator=(RtDirectIllumination &&other) = delete; void init( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc, - vk::DescriptorSetLayout camDSLayout, + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); void recompileShaders( diff --git a/src/scene/Camera.cpp b/src/scene/Camera.cpp index 9483fb68..dc7740f6 100644 --- a/src/scene/Camera.cpp +++ b/src/scene/Camera.cpp @@ -52,20 +52,17 @@ Camera::~Camera() gDevice.logical().destroy(m_descriptorSetLayout); } -void Camera::init( - wheels::ScopedScratch scopeAlloc, RingBuffer *constantsRing, - DescriptorAllocator *staticDescriptorsAlloc) +void Camera::init(wheels::ScopedScratch scopeAlloc, RingBuffer *constantsRing) { WHEELS_ASSERT(!m_initialized); WHEELS_ASSERT(constantsRing != nullptr); - WHEELS_ASSERT(staticDescriptorsAlloc != nullptr); m_constantsRing = constantsRing; LOG_INFO("Creating Camera"); createBindingsReflection(scopeAlloc.child_scope()); - createDescriptorSet(scopeAlloc.child_scope(), staticDescriptorsAlloc); + createDescriptorSet(scopeAlloc.child_scope()); m_initialized = true; } @@ -334,8 +331,7 @@ void Camera::createBindingsReflection(ScopedScratch scopeAlloc) m_bindingsReflection = WHEELS_MOV(*compResult); } -void Camera::createDescriptorSet( - ScopedScratch scopeAlloc, DescriptorAllocator *staticDescriptorsAlloc) +void Camera::createDescriptorSet(ScopedScratch scopeAlloc) { WHEELS_ASSERT(m_bindingsReflection.has_value()); m_descriptorSetLayout = m_bindingsReflection->createDescriptorSetLayout( @@ -346,7 +342,7 @@ void Camera::createDescriptorSet( vk::ShaderStageFlagBits::eMeshEXT); m_descriptorSet = - staticDescriptorsAlloc->allocate(m_descriptorSetLayout, "Camera"); + gStaticDescriptorsAlloc.allocate(m_descriptorSetLayout, "Camera"); const StaticArray descriptorInfos{ DescriptorInfo{vk::DescriptorBufferInfo{ diff --git a/src/scene/Camera.hpp b/src/scene/Camera.hpp index 68540ab9..eb0567f7 100644 --- a/src/scene/Camera.hpp +++ b/src/scene/Camera.hpp @@ -91,9 +91,7 @@ class Camera Camera &operator=(const Camera &other) = delete; Camera &operator=(Camera &&other) = delete; - void init( - wheels::ScopedScratch scopeAlloc, RingBuffer *constantsRing, - DescriptorAllocator *staticDescriptorsAlloc); + void init(wheels::ScopedScratch scopeAlloc, RingBuffer *constantsRing); void endFrame(); void lookAt(const CameraTransform &transform); @@ -126,9 +124,7 @@ class Camera private: void createBindingsReflection(wheels::ScopedScratch scopeAlloc); - void createDescriptorSet( - wheels::ScopedScratch scopeAlloc, - DescriptorAllocator *staticDescriptorsAlloc); + void createDescriptorSet(wheels::ScopedScratch scopeAlloc); void updateWorldToCamera(); void updateFrustumPlanes(const FrustumCorners &corners); diff --git a/src/scene/WorldData.cpp b/src/scene/WorldData.cpp index 7c6d47fb..40518376 100644 --- a/src/scene/WorldData.cpp +++ b/src/scene/WorldData.cpp @@ -294,6 +294,8 @@ WorldData::~WorldData() gDevice.destroy(buffer); for (const vk::Sampler sampler : m_samplers) gDevice.logical().destroy(sampler); + + m_descriptorAllocator.destroy(); } void WorldData::init( From 8a723f30fb4c4e69929d06b7d96edcd9d9229b32 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sun, 7 Jul 2024 16:26:26 +0300 Subject: [PATCH 02/12] swapchain: Return const ref to images --- src/gfx/Swapchain.cpp | 2 +- src/gfx/Swapchain.hpp | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/gfx/Swapchain.cpp b/src/gfx/Swapchain.cpp index 6ecbb455..1090d4da 100644 --- a/src/gfx/Swapchain.cpp +++ b/src/gfx/Swapchain.cpp @@ -185,7 +185,7 @@ const vk::Extent2D &Swapchain::extent() const return m_config.extent; } -SwapchainImage Swapchain::image(size_t i) const +const SwapchainImage &Swapchain::image(size_t i) const { WHEELS_ASSERT(m_initialized); diff --git a/src/gfx/Swapchain.hpp b/src/gfx/Swapchain.hpp index 37a390a9..631a0d5f 100644 --- a/src/gfx/Swapchain.hpp +++ b/src/gfx/Swapchain.hpp @@ -63,7 +63,7 @@ class Swapchain [[nodiscard]] vk::Format format() const; [[nodiscard]] const vk::Extent2D &extent() const; [[nodiscard]] uint32_t imageCount() const; - [[nodiscard]] SwapchainImage image(size_t i) const; + [[nodiscard]] const SwapchainImage &image(size_t i) const; [[nodiscard]] size_t nextFrame() const; [[nodiscard]] vk::Fence currentFence() const; // nullopt tells to recreate swapchain From bada77449fd61b48f8f40d3952d47be1f9d7943c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sun, 7 Jul 2024 17:04:52 +0300 Subject: [PATCH 03/12] Split off rendering code to Renderer App was getting hard to look through so let's try having the main draw stuff separately. --- src/App.cpp | 857 +++------------------------------ src/App.hpp | 46 +- src/render/CMakeLists.txt | 2 + src/render/DrawStats.hpp | 16 + src/render/ForwardRenderer.cpp | 16 +- src/render/ForwardRenderer.hpp | 6 +- src/render/Fwd.hpp | 6 + src/render/GBufferRenderer.cpp | 8 +- src/render/GBufferRenderer.hpp | 2 +- src/render/MeshletCuller.cpp | 16 +- src/render/MeshletCuller.hpp | 4 +- src/render/Renderer.cpp | 819 +++++++++++++++++++++++++++++++ src/render/Renderer.hpp | 110 +++++ src/utils/SceneStats.hpp | 4 - 14 files changed, 1038 insertions(+), 874 deletions(-) create mode 100644 src/render/DrawStats.hpp create mode 100644 src/render/Renderer.cpp create mode 100644 src/render/Renderer.hpp diff --git a/src/App.cpp b/src/App.cpp index 1cccdcb4..4d59ccd8 100644 --- a/src/App.cpp +++ b/src/App.cpp @@ -22,24 +22,8 @@ #include "Allocators.hpp" #include "gfx/DescriptorAllocator.hpp" #include "gfx/VkUtils.hpp" -#include "render/DebugRenderer.hpp" -#include "render/DeferredShading.hpp" -#include "render/ForwardRenderer.hpp" -#include "render/GBufferRenderer.hpp" -#include "render/ImGuiRenderer.hpp" -#include "render/ImageBasedLighting.hpp" -#include "render/LightClustering.hpp" -#include "render/MeshletCuller.hpp" #include "render/RenderResources.hpp" -#include "render/RenderTargets.hpp" -#include "render/RtReference.hpp" -#include "render/SkyboxRenderer.hpp" -#include "render/TemporalAntiAliasing.hpp" -#include "render/TextureDebug.hpp" -#include "render/TextureReadback.hpp" -#include "render/ToneMap.hpp" -#include "render/dof/DepthOfField.hpp" -#include "render/rtdi/RtDirectIllumination.hpp" +#include "render/Renderer.hpp" #include "scene/Scene.hpp" #include "scene/World.hpp" #include "utils/InputHandler.hpp" @@ -54,8 +38,6 @@ using namespace std::chrono_literals; namespace { -constexpr uint32_t sDrawStatsByteSize = 2 * sizeof(uint32_t); - StaticArray allocateCommandBuffers() { StaticArray ret; @@ -80,22 +62,7 @@ App::App(std::filesystem::path scenePath) noexcept , m_swapchain{OwningPtr{gAllocators.general}} , m_cam{OwningPtr{gAllocators.general}} , m_world{OwningPtr{gAllocators.general}} -, m_lightClustering{OwningPtr{gAllocators.general}} -, m_forwardRenderer{OwningPtr{gAllocators.general}} -, m_gbufferRenderer{OwningPtr{gAllocators.general}} -, m_deferredShading{OwningPtr{gAllocators.general}} -, m_rtDirectIllumination{OwningPtr{gAllocators.general}} -, m_rtReference{OwningPtr{gAllocators.general}} -, m_skyboxRenderer{OwningPtr{gAllocators.general}} -, m_debugRenderer{OwningPtr{gAllocators.general}} -, m_toneMap{OwningPtr{gAllocators.general}} -, m_imguiRenderer{OwningPtr{gAllocators.general}} -, m_textureDebug{OwningPtr{gAllocators.general}} -, m_depthOfField{OwningPtr{gAllocators.general}} -, m_imageBasedLighting{OwningPtr{gAllocators.general}} -, m_temporalAntiAliasing{OwningPtr{gAllocators.general}} -, m_meshletCuller{OwningPtr{gAllocators.general}} -, m_textureReadback{OwningPtr{gAllocators.general}} +, m_renderer{OwningPtr{gAllocators.general}} { } @@ -135,52 +102,11 @@ void App::init(ScopedScratch scopeAlloc) m_world->init(scopeAlloc.child_scope(), &m_constantsRing, m_scenePath); - const Timer gpuPassesInitTimer; - m_lightClustering->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_forwardRenderer->init( - scopeAlloc.child_scope(), - ForwardRenderer::InputDSLayouts{ - .camera = m_cam->descriptorSetLayout(), - .lightClusters = m_lightClustering->descriptorSetLayout(), - .world = m_world->dsLayouts(), - }); - m_gbufferRenderer->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_deferredShading->init( - scopeAlloc.child_scope(), - DeferredShading::InputDSLayouts{ - .camera = m_cam->descriptorSetLayout(), - .lightClusters = m_lightClustering->descriptorSetLayout(), - .world = m_world->dsLayouts(), - }); - m_rtDirectIllumination->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_rtReference->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_skyboxRenderer->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_debugRenderer->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); - m_toneMap->init(scopeAlloc.child_scope()); - m_imguiRenderer->init(m_swapchain->config()); - m_textureDebug->init(scopeAlloc.child_scope()); - m_depthOfField->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); - m_imageBasedLighting->init(scopeAlloc.child_scope()); - m_temporalAntiAliasing->init( - scopeAlloc.child_scope(), m_cam->descriptorSetLayout()); - m_meshletCuller->init( - scopeAlloc.child_scope(), m_world->dsLayouts(), - m_cam->descriptorSetLayout()); - m_textureReadback->init(scopeAlloc.child_scope()); + m_renderer->init( + scopeAlloc.child_scope(), m_swapchain->config(), + m_cam->descriptorSetLayout(), m_world->dsLayouts()); + m_recompileTime = std::chrono::file_clock::now(); - LOG_INFO("GPU pass init took %.2fs", gpuPassesInitTimer.getSeconds()); m_cam->lookAt(m_sceneCameraTransform); m_cam->setParameters(m_cameraParameters); @@ -231,12 +157,11 @@ void App::run() recompileShaders(scopeAlloc.child_scope()); - gRenderResources.startFrame(); m_constantsRing.startFrame(); + m_world->startFrame(); - m_meshletCuller->startFrame(); - m_depthOfField->startFrame(); - m_textureReadback->startFrame(); + + m_renderer->startFrame(); drawFrame( scopeAlloc.child_scope(), @@ -274,18 +199,9 @@ void App::recreateViewportRelated() // queue simultaneously gDevice.graphicsQueue().waitIdle(); - gRenderResources.destroyResources(); - - if (m_drawUi) - { - const ImVec2 viewportSize = m_imguiRenderer->centerAreaSize(); - m_viewportExtent = vk::Extent2D{ - asserted_cast(viewportSize.x), - asserted_cast(viewportSize.y), - }; - } - else - m_viewportExtent = m_swapchain->config().extent; + m_renderer->recreateViewportRelated(); + m_viewportExtent = m_drawUi ? m_renderer->viewportExtentInUi() + : m_swapchain->config().extent; m_cam->updateResolution( uvec2{m_viewportExtent.width, m_viewportExtent.height}); @@ -303,8 +219,6 @@ void App::recreateSwapchainAndRelated(ScopedScratch scopeAlloc) // queue simultaneously gDevice.graphicsQueue().waitIdle(); - gRenderResources.destroyResources(); - { // Drop the config as we should always use swapchain's active config const SwapchainConfig config{ scopeAlloc.child_scope(), {gWindow.width(), gWindow.height()}}; @@ -379,54 +293,9 @@ void App::recompileShaders(ScopedScratch scopeAlloc) // wait to avoid reading them mid-write. std::this_thread::sleep_for(std::chrono::milliseconds(200)); - LOG_INFO("Recompiling shaders"); - - const Timer t; - - m_lightClustering->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_forwardRenderer->recompileShaders( - scopeAlloc.child_scope(), changedFiles, - ForwardRenderer::InputDSLayouts{ - .camera = m_cam->descriptorSetLayout(), - .lightClusters = m_lightClustering->descriptorSetLayout(), - .world = m_world->dsLayouts(), - }); - m_gbufferRenderer->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_deferredShading->recompileShaders( - scopeAlloc.child_scope(), changedFiles, - DeferredShading::InputDSLayouts{ - .camera = m_cam->descriptorSetLayout(), - .lightClusters = m_lightClustering->descriptorSetLayout(), - .world = m_world->dsLayouts(), - }); - m_rtDirectIllumination->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_rtReference->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_skyboxRenderer->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout(), - m_world->dsLayouts()); - m_debugRenderer->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout()); - m_toneMap->recompileShaders(scopeAlloc.child_scope(), changedFiles); - m_textureDebug->recompileShaders(scopeAlloc.child_scope(), changedFiles); - m_depthOfField->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout()); - m_imageBasedLighting->recompileShaders( - scopeAlloc.child_scope(), changedFiles); - m_temporalAntiAliasing->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_cam->descriptorSetLayout()); - m_meshletCuller->recompileShaders( - scopeAlloc.child_scope(), changedFiles, m_world->dsLayouts(), - m_cam->descriptorSetLayout()); - - LOG_INFO("Shaders recompiled in %.2fs", t.getSeconds()); + m_renderer->recompileShaders( + scopeAlloc.child_scope(), m_cam->descriptorSetLayout(), + m_world->dsLayouts(), changedFiles); m_recompileTime = std::chrono::file_clock::now(); } @@ -524,9 +393,9 @@ void App::handleMouseGestures() } else if (gesture->type == MouseGestureType::SelectPoint) { - // Reference RT write a depth buffer so can't use the texture - // readback - if (m_renderDoF && !m_referenceRt && !m_waitFocusDistance) + // Reference RT doesn't write a depth buffer so can't use the + // texture readback + if (m_renderer->depthAvailable() && !m_waitFocusDistance) m_pickFocusDistance = true; } else @@ -625,17 +494,9 @@ void App::drawFrame(ScopedScratch scopeAlloc, uint32_t scopeHighWatermark) UiChanges uiChanges; if (m_drawUi) - { - m_imguiRenderer->startFrame(); - uiChanges = drawUi( scopeAlloc.child_scope(), nextFrame, profilerDatas, scopeHighWatermark); - } - // Clear stats for new frame after UI was drawn - m_sceneStats[nextFrame] = SceneStats{}; - if (gRenderResources.buffers->isValidHandle(m_drawStats[nextFrame])) - gRenderResources.buffers->release(m_drawStats[nextFrame]); const vk::Rect2D renderArea{ .offset = {0, 0}, @@ -654,7 +515,7 @@ void App::drawFrame(ScopedScratch scopeAlloc, uint32_t scopeHighWatermark) // -1 seems like a safe value here since an 8 sample halton sequence is // used. See A Survey of Temporal Antialiasing Techniques by Yang, Liu and // Salvi for details. - const float lodBias = m_applyTaa ? -1.f : 0.f; + const float lodBias = m_renderer->lodBias(); m_world->uploadMaterialDatas(nextFrame, lodBias); if (m_isPlaying || m_forceCamUpdate || uiChanges.timeTweaked) @@ -698,17 +559,33 @@ void App::drawFrame(ScopedScratch scopeAlloc, uint32_t scopeHighWatermark) .flags = vk::CommandBufferUsageFlagBits::eOneTimeSubmit, }); - if (m_applyIbl && !m_imageBasedLighting->isGenerated()) - m_imageBasedLighting->recordGeneration( - scopeAlloc.child_scope(), cb, *m_world, nextFrame); + // We need to build TLAS if things are animated or we can build new BLASes + if (m_renderer->rtInUse() || m_world->unbuiltBlases()) + { + PROFILER_CPU_GPU_SCOPE(cb, "BuildTLAS"); + uiChanges.rtDirty |= + m_world->buildAccelerationStructures(scopeAlloc.child_scope(), cb); + } + Renderer::Options renderOptions{ + .rtDirty = uiChanges.rtDirty, + .drawUi = m_drawUi, + }; + if (m_pickFocusDistance) + { + const Optional &gesture = gInputHandler.mouseGesture(); + WHEELS_ASSERT(gesture.has_value()); - render( - scopeAlloc.child_scope(), cb, renderArea, - RenderIndices{ - .nextFrame = nextFrame, - .nextImage = nextImage, - }, - uiChanges); + const vec2 offset = m_renderer->viewportOffsetInUi(); + m_pickedFocusPx = gesture->currentPos - offset; + renderOptions.readbackDepthPx = m_pickedFocusPx; + + m_pickFocusDistance = false; + m_waitFocusDistance = true; + } + const SwapchainImage swapImage = m_swapchain->image(nextImage); + m_renderer->render( + scopeAlloc.child_scope(), cb, *m_cam, *m_world, renderArea, swapImage, + nextFrame, renderOptions); m_newSceneDataLoaded = m_world->handleDeferredLoading(cb); @@ -718,13 +595,13 @@ void App::drawFrame(ScopedScratch scopeAlloc, uint32_t scopeHighWatermark) if (m_waitFocusDistance) { - const Optional nonLinearDepth = m_textureReadback->readback(); + const Optional nonLinearDepth = m_renderer->tryDepthReadback(); if (nonLinearDepth.has_value()) { // First we get the projected direction and linear depth - const ImVec2 viewportArea = m_imguiRenderer->centerAreaSize(); const vec2 uv = - (m_pickedFocusPx + 0.5f) / vec2{viewportArea.x, viewportArea.y}; + (m_pickedFocusPx + 0.5f) / + vec2{m_viewportExtent.width, m_viewportExtent.height}; const vec2 clipXy = uv * 2.f - 1.f; const vec4 projected = m_cam->clipToCamera() * vec4{clipXy, nonLinearDepth->x, 1.f}; @@ -829,7 +706,7 @@ App::UiChanges App::drawUi( drawOptions(); - drawRendererSettings(ret); + ret.rtDirty |= m_renderer->drawUi(*m_cam); drawProfiling(scopeAlloc.child_scope(), profilerDatas); @@ -863,67 +740,6 @@ void App::drawOptions() ImGui::Checkbox("Recompile shaders", &m_recompileShaders); - if (ImGui::Checkbox("Texture Debug", &m_textureDebugActive) && - !m_textureDebugActive) - gRenderResources.images->clearDebug(); - - ImGui::End(); -} - -void App::drawRendererSettings(UiChanges &uiChanges) -{ - ImGui::SetNextWindowPos(ImVec2{60.f, 235.f}, ImGuiCond_FirstUseEver); - ImGui::Begin( - "Renderer settings ", nullptr, ImGuiWindowFlags_AlwaysAutoResize); - - // TODO: Droplist for main renderer type - uiChanges.rtDirty |= - ImGui::Checkbox("Reference RT", &m_referenceRt) && m_referenceRt; - uiChanges.rtDirty |= ImGui::Checkbox("Depth of field (WIP)", &m_renderDoF); - ImGui::Checkbox("Temporal Anti-Aliasing", &m_applyTaa); - - if (!m_referenceRt) - { - ImGui::Checkbox("Deferred shading", &m_renderDeferred); - - if (m_renderDeferred) - uiChanges.rtDirty = - ImGui::Checkbox("RT direct illumination", &m_deferredRt); - } - - if (!m_applyTaa) - m_cam->setJitter(false); - else - { - if (ImGui::CollapsingHeader( - "Temporal Anti-Aliasing", ImGuiTreeNodeFlags_DefaultOpen)) - { - ImGui::Checkbox("Jitter", &m_applyJitter); - m_cam->setJitter(m_applyJitter); - m_temporalAntiAliasing->drawUi(); - } - } - - if (ImGui::CollapsingHeader("Tone Map", ImGuiTreeNodeFlags_DefaultOpen)) - m_toneMap->drawUi(); - - if (ImGui::CollapsingHeader("Renderer", ImGuiTreeNodeFlags_DefaultOpen)) - { - uiChanges.rtDirty |= - enumDropdown("Draw type", m_drawType, sDrawTypeNames); - if (m_referenceRt) - m_rtReference->drawUi(); - else - { - if (m_renderDeferred) - { - if (m_deferredRt) - m_rtDirectIllumination->drawUi(); - } - } - uiChanges.rtDirty |= ImGui::Checkbox("IBL", &m_applyIbl); - } - ImGui::End(); } @@ -1227,30 +1043,18 @@ bool App::drawCameraUi() return changed; } -void App::drawSceneStats(uint32_t nextFrame) const +void App::drawSceneStats(uint32_t nextFrame) { - uint32_t drawnMeshletCount = 0; - uint32_t rasterizedTriangleCount = 0; - if (gRenderResources.buffers->isValidHandle(m_drawStats[nextFrame])) - { - const uint32_t *readbackPtr = static_cast( - gRenderResources.buffers->resource(m_drawStats[nextFrame]).mapped); - WHEELS_ASSERT(readbackPtr != nullptr); - - drawnMeshletCount = readbackPtr[0]; - rasterizedTriangleCount = readbackPtr[1]; - } + const DrawStats &drawStats = m_renderer->drawStats(nextFrame); ImGui::SetNextWindowPos(ImVec2{60.f, 60.f}, ImGuiCond_FirstUseEver); ImGui::Begin("Scene stats", nullptr, ImGuiWindowFlags_AlwaysAutoResize); - ImGui::Text( - "Total triangles: %u", m_sceneStats[nextFrame].totalTriangleCount); - ImGui::Text("Rasterized triangles: %u", rasterizedTriangleCount); - ImGui::Text( - "Total meshlets: %u", m_sceneStats[nextFrame].totalMeshletCount); - ImGui::Text("Drawn meshlets: %u", drawnMeshletCount); - ImGui::Text("Total meshes: %u", m_sceneStats[nextFrame].totalMeshCount); + ImGui::Text("Total triangles: %u", drawStats.totalTriangleCount); + ImGui::Text("Rasterized triangles: %u", drawStats.rasterizedTriangleCount); + ImGui::Text("Total meshlets: %u", drawStats.totalMeshletCount); + ImGui::Text("Drawn meshlets: %u", drawStats.drawnMeshletCount); + ImGui::Text("Total meshes: %u", drawStats.totalMeshCount); ImGui::Text("Total nodes: %u", m_sceneStats[nextFrame].totalNodeCount); ImGui::Text( "Animated nodes: %u", m_sceneStats[nextFrame].animatedNodeCount); @@ -1330,550 +1134,6 @@ void App::updateDebugLines(const Scene &scene, uint32_t nextFrame) } } -void App::render( - ScopedScratch scopeAlloc, vk::CommandBuffer cb, - const vk::Rect2D &renderArea, const RenderIndices &indices, - const UiChanges &uiChanges) -{ - bool blasesAdded = false; - if (m_referenceRt || m_deferredRt || m_world->unbuiltBlases()) - { - PROFILER_CPU_GPU_SCOPE(cb, "BuildTLAS"); - blasesAdded = - m_world->buildAccelerationStructures(scopeAlloc.child_scope(), cb); - } - - const LightClusteringOutput lightClusters = m_lightClustering->record( - scopeAlloc.child_scope(), cb, *m_world, *m_cam, m_viewportExtent, - indices.nextFrame); - - ImageHandle illumination; - const BufferHandle drawStats = gRenderResources.buffers->create( - BufferDescription{ - .byteSize = sDrawStatsByteSize, - .usage = vk::BufferUsageFlagBits::eTransferDst | - vk::BufferUsageFlagBits::eTransferSrc | - vk::BufferUsageFlagBits::eStorageBuffer, - .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, - }, - "DrawStats"); - - gRenderResources.buffers->transition( - cb, drawStats, BufferState::TransferDst); - cb.fillBuffer( - gRenderResources.buffers->nativeHandle(drawStats), 0, - sDrawStatsByteSize, 0); - - if (m_referenceRt) - { - m_rtDirectIllumination->releasePreserved(); - m_temporalAntiAliasing->releasePreserved(); - - illumination = - m_rtReference - ->record( - scopeAlloc.child_scope(), cb, *m_world, *m_cam, renderArea, - RtReference::Options{ - .depthOfField = m_renderDoF, - .ibl = m_applyIbl, - .colorDirty = uiChanges.rtDirty || blasesAdded, - .drawType = m_drawType, - }, - indices.nextFrame) - .illumination; - } - else - { - // Need to clean up after toggling rt off to not "leak" the resources - m_rtReference->releasePreserved(); - - ImageHandle velocity; - ImageHandle depth; - // Opaque - if (m_renderDeferred) - { - const GBufferRendererOutput gbuffer = m_gbufferRenderer->record( - scopeAlloc.child_scope(), cb, m_meshletCuller.get(), *m_world, - *m_cam, renderArea, drawStats, m_drawType, indices.nextFrame, - &m_sceneStats[indices.nextFrame]); - - if (m_deferredRt) - illumination = - m_rtDirectIllumination - ->record( - scopeAlloc.child_scope(), cb, *m_world, *m_cam, - gbuffer, uiChanges.rtDirty || blasesAdded, - m_drawType, indices.nextFrame) - .illumination; - else - { - m_rtDirectIllumination->releasePreserved(); - - illumination = - m_deferredShading - ->record( - scopeAlloc.child_scope(), cb, *m_world, *m_cam, - DeferredShading::Input{ - .gbuffer = gbuffer, - .lightClusters = lightClusters, - }, - indices.nextFrame, m_applyIbl, m_drawType) - .illumination; - } - - gRenderResources.images->release(gbuffer.albedoRoughness); - gRenderResources.images->release(gbuffer.normalMetalness); - - velocity = gbuffer.velocity; - depth = gbuffer.depth; - } - else - { - m_rtDirectIllumination->releasePreserved(); - - const ForwardRenderer::OpaqueOutput output = - m_forwardRenderer->recordOpaque( - scopeAlloc.child_scope(), cb, m_meshletCuller.get(), - *m_world, *m_cam, renderArea, lightClusters, drawStats, - indices.nextFrame, m_applyIbl, m_drawType, - &m_sceneStats[indices.nextFrame]); - illumination = output.illumination; - velocity = output.velocity; - depth = output.depth; - } - - m_skyboxRenderer->record( - scopeAlloc.child_scope(), cb, *m_world, *m_cam, - SkyboxRenderer::RecordInOut{ - .illumination = illumination, - .velocity = velocity, - .depth = depth, - }); - - // Transparent - m_forwardRenderer->recordTransparent( - scopeAlloc.child_scope(), cb, m_meshletCuller.get(), *m_world, - *m_cam, - ForwardRenderer::TransparentInOut{ - .illumination = illumination, - .depth = depth, - }, - lightClusters, drawStats, indices.nextFrame, m_drawType, - &m_sceneStats[indices.nextFrame]); - - m_debugRenderer->record( - scopeAlloc.child_scope(), cb, *m_cam, - DebugRenderer::RecordInOut{ - .color = illumination, - .depth = depth, - }, - indices.nextFrame); - - if (m_pickFocusDistance) - { - const Optional &gesture = - gInputHandler.mouseGesture(); - WHEELS_ASSERT(gesture.has_value()); - - const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); - const vec2 px = gesture->currentPos - vec2{offset.x, offset.y}; - - m_textureReadback->record( - scopeAlloc.child_scope(), cb, depth, px, indices.nextFrame); - - m_pickFocusDistance = false; - m_pickedFocusPx = px; - m_waitFocusDistance = true; - } - - if (m_applyTaa) - { - const TemporalAntiAliasing::Output taaOutput = - m_temporalAntiAliasing->record( - scopeAlloc.child_scope(), cb, *m_cam, - TemporalAntiAliasing::Input{ - .illumination = illumination, - .velocity = velocity, - .depth = depth, - }, - indices.nextFrame); - - gRenderResources.images->release(illumination); - illumination = taaOutput.resolvedIllumination; - } - else - m_temporalAntiAliasing->releasePreserved(); - - // TODO: - // Do DoF on raw illumination and have a separate stabilizing TAA pass - // that doesn't blend foreground/background (Karis/Abadie). - if (m_renderDoF) - { - const DepthOfField::Output dofOutput = m_depthOfField->record( - scopeAlloc.child_scope(), cb, *m_cam, - DepthOfField::Input{ - .illumination = illumination, - .depth = depth, - }, - indices.nextFrame); - - gRenderResources.images->release(illumination); - illumination = dofOutput.combinedIlluminationDoF; - } - - gRenderResources.images->release(velocity); - gRenderResources.images->release(depth); - } - gRenderResources.images->release(lightClusters.pointers); - gRenderResources.texelBuffers->release(lightClusters.indicesCount); - gRenderResources.texelBuffers->release(lightClusters.indices); - - const ImageHandle toneMapped = - m_toneMap - ->record( - scopeAlloc.child_scope(), cb, illumination, indices.nextFrame) - .toneMapped; - - gRenderResources.images->release(illumination); - - ImageHandle finalComposite; - if (m_textureDebugActive) - { - const ImVec2 size = m_imguiRenderer->centerAreaSize(); - const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); - const CursorState cursor = gInputHandler.cursor(); - - // Have magnifier when mouse is on (an active) debug view - const bool uiHovered = ImGui::IsAnyItemHovered(); - const bool activeTexture = m_textureDebug->textureSelected(); - const bool cursorWithinArea = - all(greaterThan(cursor.position, vec2(offset.x, offset.y))) && - all(lessThan( - cursor.position, vec2(offset.x + size.x, offset.y + size.y))); - - Optional cursorCoord; - // Don't have debug magnifier when using ui that overlaps the render - // area - if (!uiHovered && activeTexture && cursorWithinArea) - { - // Also don't have magnifier when e.g. mouse look is active. Let - // InputHandler figure out if mouse should be visible or not. - if (!gInputHandler.mouseGesture().has_value()) - { - // The magnifier has its own pointer so let's not mask the view - // with the OS one. - gInputHandler.hideCursor(); - cursorCoord = cursor.position - vec2(offset.x, offset.y); - } - } - else - gInputHandler.showCursor(); - - const ImageHandle debugOutput = m_textureDebug->record( - scopeAlloc.child_scope(), cb, renderArea.extent, cursorCoord, - indices.nextFrame); - - finalComposite = blitColorToFinalComposite( - scopeAlloc.child_scope(), cb, debugOutput); - - gRenderResources.images->release(debugOutput); - } - else - finalComposite = - blitColorToFinalComposite(scopeAlloc.child_scope(), cb, toneMapped); - - gRenderResources.images->release(toneMapped); - - if (m_drawUi) - { - m_world->drawDeferredLoadingUi(); - - if (m_textureDebugActive) - // Draw this after so that the first frame debug is active for a new - // texture, we draw black instead of a potentially wrong output from - // the shared texture that wasn't protected yet - m_textureDebug->drawUi(indices.nextFrame); - - const vk::Rect2D backbufferArea{ - .offset = {0, 0}, - .extent = m_swapchain->config().extent, - }; - m_imguiRenderer->endFrame(cb, backbufferArea, finalComposite); - } - - blitFinalComposite(cb, finalComposite, indices.nextImage); - - gRenderResources.images->release(finalComposite); - - readbackDrawStats(cb, indices.nextFrame, drawStats); - - gRenderResources.buffers->release(drawStats); - - // Need to preserve both the new and old readback buffers. Release happens - // after the readback is read from when nextFrame wraps around. - for (const BufferHandle buffer : m_drawStats) - { - if (gRenderResources.buffers->isValidHandle(buffer)) - gRenderResources.buffers->preserve(buffer); - } -} - -ImageHandle App::blitColorToFinalComposite( - ScopedScratch scopeAlloc, vk::CommandBuffer cb, ImageHandle toneMapped) -{ - const SwapchainConfig &swapConfig = m_swapchain->config(); - const ImageHandle finalComposite = gRenderResources.images->create( - ImageDescription{ - .format = sFinalCompositeFormat, - .width = swapConfig.extent.width, - .height = swapConfig.extent.height, - .usageFlags = - vk::ImageUsageFlagBits::eColorAttachment | // Render - vk::ImageUsageFlagBits::eTransferDst | // Blit from tone - // mapped - vk::ImageUsageFlagBits::eTransferSrc, // Blit to swap image - }, - "finalComposite"); - - // Blit tonemapped into cleared final composite before drawing ui on top - transition( - WHEELS_MOV(scopeAlloc), cb, - Transitions{ - .images = StaticArray{{ - {toneMapped, ImageState::TransferSrc}, - {finalComposite, ImageState::TransferDst}, - }}, - }); - - // This scope has a barrier, but that's intentional as it should contain - // both the clear and the blit - PROFILER_CPU_GPU_SCOPE(cb, "blitColorToFinalComposite"); - - const vk::ClearColorValue clearColor{0.f, 0.f, 0.f, 0.f}; - const vk::ImageSubresourceRange subresourceRange{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .baseMipLevel = 0, - .levelCount = 1, - .baseArrayLayer = 0, - .layerCount = 1, - }; - cb.clearColorImage( - gRenderResources.images->nativeHandle(finalComposite), - vk::ImageLayout::eTransferDstOptimal, &clearColor, 1, - &subresourceRange); - - // Memory barrier for finalComposite, layout is already correct - cb.pipelineBarrier( - vk::PipelineStageFlagBits::eTransfer, - vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags{}, - { - vk::MemoryBarrier{ - .srcAccessMask = vk::AccessFlagBits::eTransferWrite, - .dstAccessMask = vk::AccessFlagBits::eTransferWrite, - }, - }, - {}, {}); - - const vk::ImageSubresourceLayers layers{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}; - - const std::array srcOffsets{ - vk::Offset3D{0, 0, 0}, - vk::Offset3D{ - asserted_cast(m_viewportExtent.width), - asserted_cast(m_viewportExtent.height), - 1, - }, - }; - - const vk::Extent2D backbufferExtent = m_swapchain->config().extent; - ivec2 dstOffset; - ivec2 dstSize; - if (m_drawUi) - { - const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); - const ImVec2 size = m_imguiRenderer->centerAreaSize(); - dstOffset = ivec2{static_cast(offset.x), offset.y}; - dstSize = ivec2{size.x, size.y}; - } - else - { - dstOffset = ivec2{0, 0}; - dstSize = ivec2{ - asserted_cast(backbufferExtent.width), - asserted_cast(backbufferExtent.height), - }; - } - - const std::array dstOffsets{ - vk::Offset3D{ - std::min( - dstOffset.x, - asserted_cast(backbufferExtent.width - 1)), - std::min( - dstOffset.y, - asserted_cast(backbufferExtent.height - 1)), - 0, - }, - vk::Offset3D{ - std::min( - asserted_cast(dstOffset.x + dstSize.x), - asserted_cast(backbufferExtent.width)), - std::min( - asserted_cast(dstOffset.y + dstSize.y), - asserted_cast(backbufferExtent.height)), - 1, - }, - }; - const vk::ImageBlit blit = { - .srcSubresource = layers, - .srcOffsets = srcOffsets, - .dstSubresource = layers, - .dstOffsets = dstOffsets, - }; - cb.blitImage( - gRenderResources.images->nativeHandle(toneMapped), - vk::ImageLayout::eTransferSrcOptimal, - gRenderResources.images->nativeHandle(finalComposite), - vk::ImageLayout::eTransferDstOptimal, 1, &blit, vk::Filter::eLinear); - - return finalComposite; -} - -void App::blitFinalComposite( - vk::CommandBuffer cb, ImageHandle finalComposite, uint32_t nextImage) -{ - // Blit to support different internal rendering resolution (and color - // format?) the future - - const auto &swapImage = m_swapchain->image(nextImage); - - const StaticArray barriers{{ - *gRenderResources.images->transitionBarrier( - finalComposite, ImageState::TransferSrc, true), - vk::ImageMemoryBarrier2{ - // TODO: - // What's the tight stage for this? Synchronization validation - // complained about a hazard after color attachment write which - // seems like an oddly specific stage for present source access to - // happen in. - .srcStageMask = vk::PipelineStageFlagBits2::eBottomOfPipe, - .srcAccessMask = vk::AccessFlags2{}, - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, - .oldLayout = vk::ImageLayout::eUndefined, - .newLayout = vk::ImageLayout::eTransferDstOptimal, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapImage.handle, - .subresourceRange = swapImage.subresourceRange, - }, - }}; - - cb.pipelineBarrier2(vk::DependencyInfo{ - .imageMemoryBarrierCount = asserted_cast(barriers.size()), - .pImageMemoryBarriers = barriers.data(), - }); - - { - PROFILER_CPU_GPU_SCOPE(cb, "BlitFinalComposite"); - - const vk::ImageSubresourceLayers layers{ - .aspectMask = vk::ImageAspectFlagBits::eColor, - .mipLevel = 0, - .baseArrayLayer = 0, - .layerCount = 1}; - - const vk::Extent3D &finalCompositeExtent = - gRenderResources.images->resource(finalComposite).extent; - WHEELS_ASSERT(finalCompositeExtent.width == swapImage.extent.width); - WHEELS_ASSERT(finalCompositeExtent.height == swapImage.extent.height); - const std::array offsets{ - vk::Offset3D{0, 0, 0}, - vk::Offset3D{ - asserted_cast(m_swapchain->config().extent.width), - asserted_cast(m_swapchain->config().extent.height), - 1, - }, - }; - const auto blit = vk::ImageBlit{ - .srcSubresource = layers, - .srcOffsets = offsets, - .dstSubresource = layers, - .dstOffsets = offsets, - }; - cb.blitImage( - gRenderResources.images->nativeHandle(finalComposite), - vk::ImageLayout::eTransferSrcOptimal, swapImage.handle, - vk::ImageLayout::eTransferDstOptimal, 1, &blit, - vk::Filter::eLinear); - } - - { - const vk::ImageMemoryBarrier2 barrier{ - .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, - .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, - // TODO: - // What's the tight stage and correct access for this? - .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, - .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, - .oldLayout = vk::ImageLayout::eTransferDstOptimal, - .newLayout = vk::ImageLayout::ePresentSrcKHR, - .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, - .image = swapImage.handle, - .subresourceRange = swapImage.subresourceRange, - }; - - cb.pipelineBarrier2(vk::DependencyInfo{ - .imageMemoryBarrierCount = 1, - .pImageMemoryBarriers = &barrier, - }); - } -} - -void App::readbackDrawStats( - vk::CommandBuffer cb, uint32_t nextFrame, BufferHandle srcBuffer) -{ - BufferHandle &dstBuffer = m_drawStats[nextFrame]; - WHEELS_ASSERT(!gRenderResources.buffers->isValidHandle(dstBuffer)); - dstBuffer = gRenderResources.buffers->create( - BufferDescription{ - .byteSize = sDrawStatsByteSize, - .usage = vk::BufferUsageFlagBits::eTransferDst, - .properties = vk::MemoryPropertyFlagBits::eHostVisible | - vk::MemoryPropertyFlagBits::eHostCoherent, - }, - "DrawStatsReadback"); - WHEELS_ASSERT( - gRenderResources.buffers->resource(srcBuffer).byteSize == - gRenderResources.buffers->resource(dstBuffer).byteSize); - - const StaticArray barriers{{ - *gRenderResources.buffers->transitionBarrier( - srcBuffer, BufferState::TransferSrc, true), - *gRenderResources.buffers->transitionBarrier( - dstBuffer, BufferState::TransferDst, true), - }}; - - cb.pipelineBarrier2(vk::DependencyInfo{ - .bufferMemoryBarrierCount = asserted_cast(barriers.size()), - .pBufferMemoryBarriers = barriers.data(), - }); - - const vk::BufferCopy region{ - .srcOffset = 0, - .dstOffset = 0, - .size = sDrawStatsByteSize, - }; - cb.copyBuffer( - gRenderResources.buffers->nativeHandle(srcBuffer), - gRenderResources.buffers->nativeHandle(dstBuffer), 1, ®ion); -} - bool App::submitAndPresent(vk::CommandBuffer cb, uint32_t nextFrame) { const StaticArray waitSemaphores{m_imageAvailableSemaphores[nextFrame]}; @@ -1901,10 +1161,7 @@ bool App::submitAndPresent(vk::CommandBuffer cb, uint32_t nextFrame) void App::handleResizes(ScopedScratch scopeAlloc, bool shouldResizeSwapchain) { - const ImVec2 viewportSize = m_imguiRenderer->centerAreaSize(); - const bool viewportResized = - asserted_cast(viewportSize.x) != m_viewportExtent.width || - asserted_cast(viewportSize.y) != m_viewportExtent.height; + const bool viewportResized = m_renderer->viewportResized(); // Recreate swapchain if so indicated and explicitly handle resizes if (shouldResizeSwapchain || gWindow.resized()) diff --git a/src/App.hpp b/src/App.hpp index 87e7782c..86e75a33 100644 --- a/src/App.hpp +++ b/src/App.hpp @@ -70,7 +70,6 @@ class App const wheels::Array &profilerDatas, uint32_t scopeHighWatermark); void drawOptions(); - void drawRendererSettings(UiChanges &uiChanges); void drawProfiling( wheels::ScopedScratch scopeAlloc, const wheels::Array &profilerDatas); @@ -79,26 +78,10 @@ class App bool drawTimeline(); // Returns true if settings changed bool drawCameraUi(); - void drawSceneStats(uint32_t nextFrame) const; + void drawSceneStats(uint32_t nextFrame); void updateDebugLines(const Scene &scene, uint32_t nextFrame); - struct RenderIndices - { - uint32_t nextFrame{0xFFFF'FFFF}; - uint32_t nextImage{0xFFFF'FFFF}; - }; - void render( - wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - const vk::Rect2D &renderArea, const RenderIndices &indices, - const UiChanges &uiChanges); - [[nodiscard]] ImageHandle blitColorToFinalComposite( - wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - ImageHandle toneMapped); - void blitFinalComposite( - vk::CommandBuffer cb, ImageHandle finalComposite, uint32_t nextImage); - void readbackDrawStats( - vk::CommandBuffer cb, uint32_t nextFrame, BufferHandle srcBuffer); // Returns true if present succeeded, false if swapchain should be recreated [[nodiscard]] bool submitAndPresent( vk::CommandBuffer cb, uint32_t nextFrame); @@ -122,40 +105,16 @@ class App wheels::OwningPtr m_cam; wheels::OwningPtr m_world; - wheels::OwningPtr m_lightClustering; - wheels::OwningPtr m_forwardRenderer; - wheels::OwningPtr m_gbufferRenderer; - wheels::OwningPtr m_deferredShading; - wheels::OwningPtr m_rtDirectIllumination; - wheels::OwningPtr m_rtReference; - wheels::OwningPtr m_skyboxRenderer; - wheels::OwningPtr m_debugRenderer; - wheels::OwningPtr m_toneMap; - wheels::OwningPtr m_imguiRenderer; - wheels::OwningPtr m_textureDebug; - wheels::OwningPtr m_depthOfField; - wheels::OwningPtr m_imageBasedLighting; - wheels::OwningPtr m_temporalAntiAliasing; - wheels::OwningPtr m_meshletCuller; - wheels::OwningPtr m_textureReadback; + wheels::OwningPtr m_renderer; bool m_useFpsLimit{true}; int32_t m_fpsLimit{140}; bool m_recompileShaders{false}; - bool m_referenceRt{false}; - bool m_renderDeferred{true}; - bool m_deferredRt{false}; - bool m_renderDoF{false}; - bool m_textureDebugActive{false}; bool m_drawUi{true}; bool m_forceViewportRecreate{false}; bool m_forceCamUpdate{true}; - bool m_applyIbl{false}; bool m_sceneChanged{false}; bool m_newSceneDataLoaded{false}; - bool m_applyTaa{true}; - bool m_applyJitter{true}; - DrawType m_drawType{DrawType::Default}; bool m_camFreeLook{false}; CameraTransform m_sceneCameraTransform; @@ -167,7 +126,6 @@ class App bool m_waitFocusDistance{false}; wheels::StaticArray m_sceneStats; - wheels::StaticArray m_drawStats; std::chrono::high_resolution_clock::time_point m_lastTimeChange; float m_timeOffsetS{0.f}; diff --git a/src/render/CMakeLists.txt b/src/render/CMakeLists.txt index efd47ce4..8bb526d8 100644 --- a/src/render/CMakeLists.txt +++ b/src/render/CMakeLists.txt @@ -13,6 +13,7 @@ set(PROSPER_RENDER_INCLUDES ${CMAKE_CURRENT_LIST_DIR}/ImGuiRenderer.hpp ${CMAKE_CURRENT_LIST_DIR}/LightClustering.hpp ${CMAKE_CURRENT_LIST_DIR}/MeshletCuller.hpp + ${CMAKE_CURRENT_LIST_DIR}/Renderer.hpp ${CMAKE_CURRENT_LIST_DIR}/RenderImageCollection.hpp ${CMAKE_CURRENT_LIST_DIR}/RenderResourceCollection.hpp ${CMAKE_CURRENT_LIST_DIR}/RenderResourceHandle.hpp @@ -40,6 +41,7 @@ set(PROSPER_RENDER_SOURCES ${CMAKE_CURRENT_LIST_DIR}/ImGuiRenderer.cpp ${CMAKE_CURRENT_LIST_DIR}/LightClustering.cpp ${CMAKE_CURRENT_LIST_DIR}/MeshletCuller.cpp + ${CMAKE_CURRENT_LIST_DIR}/Renderer.cpp ${CMAKE_CURRENT_LIST_DIR}/RenderImageCollection.cpp ${CMAKE_CURRENT_LIST_DIR}/RenderResources.cpp ${CMAKE_CURRENT_LIST_DIR}/RenderTargets.cpp diff --git a/src/render/DrawStats.hpp b/src/render/DrawStats.hpp new file mode 100644 index 00000000..295c9aed --- /dev/null +++ b/src/render/DrawStats.hpp @@ -0,0 +1,16 @@ +#ifndef PROSPER_RENDER_DRAW_STATS_HPP +#define PROSPER_RENDER_DRAW_STATS_HPP + +#include + +struct DrawStats +{ + uint32_t drawnMeshletCount{0}; + uint32_t rasterizedTriangleCount{0}; + uint32_t totalTriangleCount{0}; + uint32_t totalMeshletCount{0}; + uint32_t totalMeshCount{0}; + uint32_t totalModelCount{0}; +}; + +#endif // PROSPER_RENDER_DRAW_STATS_HPP diff --git a/src/render/ForwardRenderer.cpp b/src/render/ForwardRenderer.cpp index 5de5dc6a..38a18784 100644 --- a/src/render/ForwardRenderer.cpp +++ b/src/render/ForwardRenderer.cpp @@ -13,8 +13,8 @@ #include "../scene/WorldRenderStructs.hpp" #include "../utils/Logger.hpp" #include "../utils/Profiler.hpp" -#include "../utils/SceneStats.hpp" #include "../utils/Utils.hpp" +#include "DrawStats.hpp" #include "LightClustering.hpp" #include "MeshletCuller.hpp" #include "RenderResources.hpp" @@ -103,7 +103,7 @@ ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( MeshletCuller *meshletCuller, const World &world, const Camera &cam, const vk::Rect2D &renderArea, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, - DrawType drawType, SceneStats *sceneStats) + DrawType drawType, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -124,7 +124,7 @@ ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( .ibl = applyIbl, .drawType = drawType, }, - sceneStats, "OpaqueGeometry"); + drawStats, "OpaqueGeometry"); return ret; } @@ -134,7 +134,7 @@ void ForwardRenderer::recordTransparent( MeshletCuller *meshletCuller, const World &world, const Camera &cam, const TransparentInOut &inOutTargets, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - uint32_t nextFrame, DrawType drawType, SceneStats *sceneStats) + uint32_t nextFrame, DrawType drawType, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -149,7 +149,7 @@ void ForwardRenderer::recordTransparent( .transparents = true, .drawType = drawType, }, - sceneStats, "TransparentGeometry"); + drawStats, "TransparentGeometry"); } bool ForwardRenderer::compileShaders( @@ -385,10 +385,10 @@ void ForwardRenderer::record( MeshletCuller *meshletCuller, const World &world, const Camera &cam, const uint32_t nextFrame, const RecordInOut &inOutTargets, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - const Options &options, SceneStats *sceneStats, const char *debugName) + const Options &options, DrawStats *drawStats, const char *debugName) { WHEELS_ASSERT(meshletCuller != nullptr); - WHEELS_ASSERT(sceneStats != nullptr); + WHEELS_ASSERT(drawStats != nullptr); PROFILER_CPU_SCOPE(debugName); @@ -403,7 +403,7 @@ void ForwardRenderer::record( options.transparents ? "Transparent" : "Opaque"; const MeshletCullerOutput cullerOutput = meshletCuller->record( scopeAlloc.child_scope(), cb, cullerMode, world, cam, nextFrame, - cullerDebugPrefix, sceneStats); + cullerDebugPrefix, drawStats); updateDescriptorSet( scopeAlloc.child_scope(), nextFrame, options.transparents, cullerOutput, diff --git a/src/render/ForwardRenderer.hpp b/src/render/ForwardRenderer.hpp index 44e3dbe2..5202b950 100644 --- a/src/render/ForwardRenderer.hpp +++ b/src/render/ForwardRenderer.hpp @@ -50,7 +50,7 @@ class ForwardRenderer const vk::Rect2D &renderArea, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, DrawType drawType, - SceneStats *sceneStats); + DrawStats *drawStats); struct TransparentInOut { @@ -62,7 +62,7 @@ class ForwardRenderer MeshletCuller *meshletCuller, const World &world, const Camera &cam, const TransparentInOut &inOutTargets, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - uint32_t nextFrame, DrawType drawType, SceneStats *sceneStats); + uint32_t nextFrame, DrawType drawType, DrawStats *drawStats); private: [[nodiscard]] bool compileShaders( @@ -94,7 +94,7 @@ class ForwardRenderer MeshletCuller *meshletCuller, const World &world, const Camera &cam, uint32_t nextFrame, const RecordInOut &inOutTargets, const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - const Options &options, SceneStats *sceneStats, const char *debugName); + const Options &options, DrawStats *drawStats, const char *debugName); struct Attachments { diff --git a/src/render/Fwd.hpp b/src/render/Fwd.hpp index 28a3281b..ba602e17 100644 --- a/src/render/Fwd.hpp +++ b/src/render/Fwd.hpp @@ -10,6 +10,9 @@ class ComputePass; // DebugRenderer.hpp class DebugRenderer; +// DrawStats.hpp +struct DrawStats; + // DeferredShading.hpp class DeferredShading; @@ -37,6 +40,9 @@ struct LightClusteringOutput; struct MeshletCullerOutput; class MeshletCuller; +// Renderer.hpp +class Renderer; + // RenderResources.hpp class RenderResources; diff --git a/src/render/GBufferRenderer.cpp b/src/render/GBufferRenderer.cpp index 9b099ab7..af0ea2a8 100644 --- a/src/render/GBufferRenderer.cpp +++ b/src/render/GBufferRenderer.cpp @@ -12,8 +12,8 @@ #include "../scene/WorldRenderStructs.hpp" #include "../utils/Logger.hpp" #include "../utils/Profiler.hpp" -#include "../utils/SceneStats.hpp" #include "../utils/Utils.hpp" +#include "DrawStats.hpp" #include "LightClustering.hpp" #include "MeshletCuller.hpp" #include "RenderResources.hpp" @@ -107,11 +107,11 @@ GBufferRendererOutput GBufferRenderer::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, - DrawType drawType, const uint32_t nextFrame, SceneStats *sceneStats) + DrawType drawType, const uint32_t nextFrame, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); WHEELS_ASSERT(meshletCuller != nullptr); - WHEELS_ASSERT(sceneStats != nullptr); + WHEELS_ASSERT(drawStats != nullptr); PROFILER_CPU_SCOPE("GBuffer"); @@ -146,7 +146,7 @@ GBufferRendererOutput GBufferRenderer::record( const MeshletCullerOutput cullerOutput = meshletCuller->record( scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Opaque, world, - cam, nextFrame, "GBuffer", sceneStats); + cam, nextFrame, "GBuffer", drawStats); updateDescriptorSet( scopeAlloc.child_scope(), nextFrame, cullerOutput, inOutDrawStats); diff --git a/src/render/GBufferRenderer.hpp b/src/render/GBufferRenderer.hpp index e447aa27..98ec19e6 100644 --- a/src/render/GBufferRenderer.hpp +++ b/src/render/GBufferRenderer.hpp @@ -45,7 +45,7 @@ class GBufferRenderer wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, - DrawType drawType, uint32_t nextFrame, SceneStats *sceneStats); + DrawType drawType, uint32_t nextFrame, DrawStats *drawStats); private: [[nodiscard]] bool compileShaders( diff --git a/src/render/MeshletCuller.cpp b/src/render/MeshletCuller.cpp index ead54407..1b15a7eb 100644 --- a/src/render/MeshletCuller.cpp +++ b/src/render/MeshletCuller.cpp @@ -9,7 +9,7 @@ #include "../scene/World.hpp" #include "../scene/WorldRenderStructs.hpp" #include "../utils/Profiler.hpp" -#include "../utils/SceneStats.hpp" +#include "DrawStats.hpp" #include "RenderResources.hpp" using namespace glm; @@ -201,7 +201,7 @@ void MeshletCuller::startFrame() MeshletCullerOutput MeshletCuller::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, - const char *debugPrefix, SceneStats *sceneStats) + const char *debugPrefix, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -213,7 +213,7 @@ MeshletCullerOutput MeshletCuller::record( const BufferHandle initialList = recordGenerateList( scopeAlloc.child_scope(), cb, mode, world, nextFrame, debugPrefix, - sceneStats); + drawStats); const BufferHandle cullerArgs = recordWriteCullerArgs( scopeAlloc.child_scope(), cb, nextFrame, initialList, debugPrefix); @@ -235,7 +235,7 @@ MeshletCullerOutput MeshletCuller::record( BufferHandle MeshletCuller::recordGenerateList( ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, uint32_t nextFrame, const char *debugPrefix, - SceneStats *sceneStats) + DrawStats *drawStats) { uint32_t meshletCountUpperBound = 0; { @@ -262,13 +262,13 @@ BufferHandle MeshletCuller::recordGenerateList( if (shouldDraw) { - sceneStats->totalMeshCount++; - sceneStats->totalTriangleCount += info.indexCount / 3; - sceneStats->totalMeshletCount += info.meshletCount; + drawStats->totalMeshCount++; + drawStats->totalTriangleCount += info.indexCount / 3; + drawStats->totalMeshletCount += info.meshletCount; meshletCountUpperBound += info.meshletCount; if (!modelDrawn) { - sceneStats->totalModelCount++; + drawStats->totalModelCount++; modelDrawn = true; } } diff --git a/src/render/MeshletCuller.hpp b/src/render/MeshletCuller.hpp index b27f5a9b..dce5cc24 100644 --- a/src/render/MeshletCuller.hpp +++ b/src/render/MeshletCuller.hpp @@ -52,13 +52,13 @@ class MeshletCuller [[nodiscard]] MeshletCullerOutput record( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, - const char *debugPrefix, SceneStats *sceneStats); + const char *debugPrefix, DrawStats *drawStats); private: [[nodiscard]] BufferHandle recordGenerateList( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, uint32_t nextFrame, const char *debugPrefix, - SceneStats *sceneStats); + DrawStats *drawStats); [[nodiscard]] BufferHandle recordWriteCullerArgs( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, diff --git a/src/render/Renderer.cpp b/src/render/Renderer.cpp new file mode 100644 index 00000000..4e6c777b --- /dev/null +++ b/src/render/Renderer.cpp @@ -0,0 +1,819 @@ +#include "Renderer.hpp" + +#include "../Allocators.hpp" +#include "../gfx/Swapchain.hpp" +#include "../scene/Camera.hpp" +#include "../scene/World.hpp" +#include "../utils/InputHandler.hpp" +#include "../utils/Logger.hpp" +#include "../utils/Profiler.hpp" +#include "../utils/Timer.hpp" +#include "../utils/Ui.hpp" +#include "DebugRenderer.hpp" +#include "DeferredShading.hpp" +#include "ForwardRenderer.hpp" +#include "GBufferRenderer.hpp" +#include "ImGuiRenderer.hpp" +#include "ImageBasedLighting.hpp" +#include "LightClustering.hpp" +#include "MeshletCuller.hpp" +#include "RenderResources.hpp" +#include "RenderTargets.hpp" +#include "RtReference.hpp" +#include "SkyboxRenderer.hpp" +#include "TemporalAntiAliasing.hpp" +#include "TextureDebug.hpp" +#include "TextureReadback.hpp" +#include "ToneMap.hpp" +#include "dof/DepthOfField.hpp" +#include "rtdi/RtDirectIllumination.hpp" + +using namespace wheels; +using namespace glm; + +namespace +{ + +constexpr uint32_t sDrawStatsByteSize = 2 * sizeof(uint32_t); + +void blitFinalComposite( + vk::CommandBuffer cb, ImageHandle finalComposite, + const SwapchainImage &swapImage) +{ + // Blit to support different internal rendering resolution (and color + // format?) the future + + const StaticArray barriers{{ + *gRenderResources.images->transitionBarrier( + finalComposite, ImageState::TransferSrc, true), + vk::ImageMemoryBarrier2{ + // TODO: + // What's the tight stage for this? Synchronization validation + // complained about a hazard after color attachment write which + // seems like an oddly specific stage for present source access to + // happen in. + .srcStageMask = vk::PipelineStageFlagBits2::eBottomOfPipe, + .srcAccessMask = vk::AccessFlags2{}, + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eTransferWrite, + .oldLayout = vk::ImageLayout::eUndefined, + .newLayout = vk::ImageLayout::eTransferDstOptimal, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapImage.handle, + .subresourceRange = swapImage.subresourceRange, + }, + }}; + + cb.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = asserted_cast(barriers.size()), + .pImageMemoryBarriers = barriers.data(), + }); + + { + PROFILER_CPU_GPU_SCOPE(cb, "BlitFinalComposite"); + + const vk::ImageSubresourceLayers layers{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1}; + + const vk::Extent3D &finalCompositeExtent = + gRenderResources.images->resource(finalComposite).extent; + WHEELS_ASSERT(finalCompositeExtent.width == swapImage.extent.width); + WHEELS_ASSERT(finalCompositeExtent.height == swapImage.extent.height); + const std::array offsets{ + vk::Offset3D{0, 0, 0}, + vk::Offset3D{ + asserted_cast(swapImage.extent.width), + asserted_cast(swapImage.extent.height), + 1, + }, + }; + const auto blit = vk::ImageBlit{ + .srcSubresource = layers, + .srcOffsets = offsets, + .dstSubresource = layers, + .dstOffsets = offsets, + }; + cb.blitImage( + gRenderResources.images->nativeHandle(finalComposite), + vk::ImageLayout::eTransferSrcOptimal, swapImage.handle, + vk::ImageLayout::eTransferDstOptimal, 1, &blit, + vk::Filter::eLinear); + } + + { + const vk::ImageMemoryBarrier2 barrier{ + .srcStageMask = vk::PipelineStageFlagBits2::eTransfer, + .srcAccessMask = vk::AccessFlagBits2::eTransferWrite, + // TODO: + // What's the tight stage and correct access for this? + .dstStageMask = vk::PipelineStageFlagBits2::eTransfer, + .dstAccessMask = vk::AccessFlagBits2::eMemoryRead, + .oldLayout = vk::ImageLayout::eTransferDstOptimal, + .newLayout = vk::ImageLayout::ePresentSrcKHR, + .srcQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .dstQueueFamilyIndex = VK_QUEUE_FAMILY_IGNORED, + .image = swapImage.handle, + .subresourceRange = swapImage.subresourceRange, + }; + + cb.pipelineBarrier2(vk::DependencyInfo{ + .imageMemoryBarrierCount = 1, + .pImageMemoryBarriers = &barrier, + }); + } +} + +} // namespace + +Renderer::Renderer() noexcept +: m_lightClustering{OwningPtr{gAllocators.general}} +, m_forwardRenderer{OwningPtr{gAllocators.general}} +, m_gbufferRenderer{OwningPtr{gAllocators.general}} +, m_deferredShading{OwningPtr{gAllocators.general}} +, m_rtDirectIllumination{OwningPtr{gAllocators.general}} +, m_rtReference{OwningPtr{gAllocators.general}} +, m_skyboxRenderer{OwningPtr{gAllocators.general}} +, m_debugRenderer{OwningPtr{gAllocators.general}} +, m_toneMap{OwningPtr{gAllocators.general}} +, m_imguiRenderer{OwningPtr{gAllocators.general}} +, m_textureDebug{OwningPtr{gAllocators.general}} +, m_depthOfField{OwningPtr{gAllocators.general}} +, m_imageBasedLighting{OwningPtr{gAllocators.general}} +, m_temporalAntiAliasing{OwningPtr{gAllocators.general}} +, m_meshletCuller{OwningPtr{gAllocators.general}} +, m_textureReadback{OwningPtr{gAllocators.general}} +{ +} + +// Define here to have the definitions of the member classes available without +// including the headers in Renderer.hpp +Renderer::~Renderer() = default; + +void Renderer::init( + wheels::ScopedScratch scopeAlloc, const SwapchainConfig &swapchainConfig, + vk::DescriptorSetLayout camDsLayout, const WorldDSLayouts &worldDsLayouts) +{ + const Timer gpuPassesInitTimer; + m_lightClustering->init( + scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + m_forwardRenderer->init( + scopeAlloc.child_scope(), + ForwardRenderer::InputDSLayouts{ + .camera = camDsLayout, + .lightClusters = m_lightClustering->descriptorSetLayout(), + .world = worldDsLayouts, + }); + m_gbufferRenderer->init( + scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + m_deferredShading->init( + scopeAlloc.child_scope(), + DeferredShading::InputDSLayouts{ + .camera = camDsLayout, + .lightClusters = m_lightClustering->descriptorSetLayout(), + .world = worldDsLayouts, + }); + m_rtDirectIllumination->init( + scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + m_rtReference->init(scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + m_skyboxRenderer->init( + scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + m_debugRenderer->init(scopeAlloc.child_scope(), camDsLayout); + m_toneMap->init(scopeAlloc.child_scope()); + m_imguiRenderer->init(swapchainConfig); + m_textureDebug->init(scopeAlloc.child_scope()); + m_depthOfField->init(scopeAlloc.child_scope(), camDsLayout); + m_imageBasedLighting->init(scopeAlloc.child_scope()); + m_temporalAntiAliasing->init(scopeAlloc.child_scope(), camDsLayout); + m_meshletCuller->init( + scopeAlloc.child_scope(), worldDsLayouts, camDsLayout); + m_textureReadback->init(scopeAlloc.child_scope()); + LOG_INFO("GPU pass init took %.2fs", gpuPassesInitTimer.getSeconds()); +} + +void Renderer::startFrame() +{ + gRenderResources.startFrame(); + m_meshletCuller->startFrame(); + m_depthOfField->startFrame(); + m_textureReadback->startFrame(); + + // TODO: + // Is this ok here? should it happen after gpu frame starts and we have the + // next swapchain index? + m_imguiRenderer->startFrame(); +} + +void Renderer::recompileShaders( + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout, + const WorldDSLayouts &worldDsLayouts, + const HashSet &changedFiles) +{ + LOG_INFO("Recompiling shaders"); + + const Timer t; + + m_lightClustering->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout, worldDsLayouts); + m_forwardRenderer->recompileShaders( + scopeAlloc.child_scope(), changedFiles, + ForwardRenderer::InputDSLayouts{ + .camera = camDsLayout, + .lightClusters = m_lightClustering->descriptorSetLayout(), + .world = worldDsLayouts, + }); + m_gbufferRenderer->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout, worldDsLayouts); + m_deferredShading->recompileShaders( + scopeAlloc.child_scope(), changedFiles, + DeferredShading::InputDSLayouts{ + .camera = camDsLayout, + .lightClusters = m_lightClustering->descriptorSetLayout(), + .world = worldDsLayouts, + }); + m_rtDirectIllumination->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout, worldDsLayouts); + m_rtReference->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout, worldDsLayouts); + m_skyboxRenderer->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout, worldDsLayouts); + m_debugRenderer->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout); + m_toneMap->recompileShaders(scopeAlloc.child_scope(), changedFiles); + m_textureDebug->recompileShaders(scopeAlloc.child_scope(), changedFiles); + m_depthOfField->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout); + m_imageBasedLighting->recompileShaders( + scopeAlloc.child_scope(), changedFiles); + m_temporalAntiAliasing->recompileShaders( + scopeAlloc.child_scope(), changedFiles, camDsLayout); + m_meshletCuller->recompileShaders( + scopeAlloc.child_scope(), changedFiles, worldDsLayouts, camDsLayout); + + LOG_INFO("Shaders recompiled in %.2fs", t.getSeconds()); +} + +void Renderer::recreateSwapchainAndRelated() +{ + gRenderResources.destroyResources(); +} + +void Renderer::recreateViewportRelated() +{ + gRenderResources.destroyResources(); + + const ImVec2 viewportSize = m_imguiRenderer->centerAreaSize(); + m_viewportExtentInUi = vk::Extent2D{ + asserted_cast(viewportSize.x), + asserted_cast(viewportSize.y), + }; +} + +bool Renderer::drawUi(Camera &cam) +{ + ImGui::SetNextWindowPos(ImVec2{60.f, 235.f}, ImGuiCond_FirstUseEver); + ImGui::Begin( + "Renderer settings ", nullptr, ImGuiWindowFlags_AlwaysAutoResize); + + if (ImGui::Checkbox("Texture Debug", &m_textureDebugActive) && + !m_textureDebugActive) + gRenderResources.images->clearDebug(); + + bool rtDirty = false; + // TODO: Droplist for main renderer type + rtDirty |= ImGui::Checkbox("Reference RT", &m_referenceRt) && m_referenceRt; + rtDirty |= ImGui::Checkbox("Depth of field (WIP)", &m_renderDoF); + ImGui::Checkbox("Temporal Anti-Aliasing", &m_applyTaa); + + if (!m_referenceRt) + { + ImGui::Checkbox("Deferred shading", &m_renderDeferred); + + if (m_renderDeferred) + rtDirty |= ImGui::Checkbox("RT direct illumination", &m_deferredRt); + } + + if (!m_applyTaa) + cam.setJitter(false); + else + { + if (ImGui::CollapsingHeader( + "Temporal Anti-Aliasing", ImGuiTreeNodeFlags_DefaultOpen)) + { + ImGui::Checkbox("Jitter", &m_applyJitter); + cam.setJitter(m_applyJitter); + m_temporalAntiAliasing->drawUi(); + } + } + + if (ImGui::CollapsingHeader("Tone Map", ImGuiTreeNodeFlags_DefaultOpen)) + m_toneMap->drawUi(); + + if (ImGui::CollapsingHeader("Renderer", ImGuiTreeNodeFlags_DefaultOpen)) + { + rtDirty |= enumDropdown("Draw type", m_drawType, sDrawTypeNames); + if (m_referenceRt) + m_rtReference->drawUi(); + else + { + if (m_renderDeferred) + { + if (m_deferredRt) + m_rtDirectIllumination->drawUi(); + } + } + rtDirty |= ImGui::Checkbox("IBL", &m_applyIbl); + } + + ImGui::End(); + + return rtDirty; +} + +void Renderer::render( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, const Camera &cam, + World &world, const vk::Rect2D &renderArea, const SwapchainImage &swapImage, + const uint32_t nextFrame, const Options &options) +{ + // Clear stats for new frame + DrawStats &drawStats = m_drawStats[nextFrame]; + drawStats = DrawStats{}; + + if (gRenderResources.buffers->isValidHandle(m_gpuDrawStats[nextFrame])) + gRenderResources.buffers->release(m_gpuDrawStats[nextFrame]); + + if (m_applyIbl && !m_imageBasedLighting->isGenerated()) + m_imageBasedLighting->recordGeneration( + scopeAlloc.child_scope(), cb, world, nextFrame); + + const LightClusteringOutput lightClusters = m_lightClustering->record( + scopeAlloc.child_scope(), cb, world, cam, renderArea.extent, nextFrame); + + const BufferHandle gpuDrawStats = gRenderResources.buffers->create( + BufferDescription{ + .byteSize = sDrawStatsByteSize, + .usage = vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eTransferSrc | + vk::BufferUsageFlagBits::eStorageBuffer, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + }, + "DrawStats"); + + gRenderResources.buffers->transition( + cb, gpuDrawStats, BufferState::TransferDst); + cb.fillBuffer( + gRenderResources.buffers->nativeHandle(gpuDrawStats), 0, + sDrawStatsByteSize, 0); + + ImageHandle illumination; + if (m_referenceRt) + { + m_rtDirectIllumination->releasePreserved(); + m_temporalAntiAliasing->releasePreserved(); + + illumination = + m_rtReference + ->record( + scopeAlloc.child_scope(), cb, world, cam, renderArea, + RtReference::Options{ + .depthOfField = m_renderDoF, + .ibl = m_applyIbl, + .colorDirty = options.rtDirty, + .drawType = m_drawType, + }, + nextFrame) + .illumination; + } + else + { + // Need to clean up after toggling rt off to not "leak" the resources + m_rtReference->releasePreserved(); + + ImageHandle velocity; + ImageHandle depth; + // Opaque + if (m_renderDeferred) + { + const GBufferRendererOutput gbuffer = m_gbufferRenderer->record( + scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, cam, + renderArea, gpuDrawStats, m_drawType, nextFrame, &drawStats); + + if (m_deferredRt) + illumination = + m_rtDirectIllumination + ->record( + scopeAlloc.child_scope(), cb, world, cam, gbuffer, + options.rtDirty, m_drawType, nextFrame) + .illumination; + else + { + m_rtDirectIllumination->releasePreserved(); + + illumination = m_deferredShading + ->record( + scopeAlloc.child_scope(), cb, world, cam, + DeferredShading::Input{ + .gbuffer = gbuffer, + .lightClusters = lightClusters, + }, + nextFrame, m_applyIbl, m_drawType) + .illumination; + } + + gRenderResources.images->release(gbuffer.albedoRoughness); + gRenderResources.images->release(gbuffer.normalMetalness); + + velocity = gbuffer.velocity; + depth = gbuffer.depth; + } + else + { + m_rtDirectIllumination->releasePreserved(); + + const ForwardRenderer::OpaqueOutput output = + m_forwardRenderer->recordOpaque( + scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, + cam, renderArea, lightClusters, gpuDrawStats, nextFrame, + m_applyIbl, m_drawType, &drawStats); + illumination = output.illumination; + velocity = output.velocity; + depth = output.depth; + } + + m_skyboxRenderer->record( + scopeAlloc.child_scope(), cb, world, cam, + SkyboxRenderer::RecordInOut{ + .illumination = illumination, + .velocity = velocity, + .depth = depth, + }); + + // Transparent + m_forwardRenderer->recordTransparent( + scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, cam, + ForwardRenderer::TransparentInOut{ + .illumination = illumination, + .depth = depth, + }, + lightClusters, gpuDrawStats, nextFrame, m_drawType, &drawStats); + + m_debugRenderer->record( + scopeAlloc.child_scope(), cb, cam, + DebugRenderer::RecordInOut{ + .color = illumination, + .depth = depth, + }, + nextFrame); + + if (options.readbackDepthPx.has_value()) + + m_textureReadback->record( + scopeAlloc.child_scope(), cb, depth, *options.readbackDepthPx, + nextFrame); + + if (m_applyTaa) + { + const TemporalAntiAliasing::Output taaOutput = + m_temporalAntiAliasing->record( + scopeAlloc.child_scope(), cb, cam, + TemporalAntiAliasing::Input{ + .illumination = illumination, + .velocity = velocity, + .depth = depth, + }, + nextFrame); + + gRenderResources.images->release(illumination); + illumination = taaOutput.resolvedIllumination; + } + else + m_temporalAntiAliasing->releasePreserved(); + + // TODO: + // Do DoF on raw illumination and have a separate stabilizing TAA pass + // that doesn't blend foreground/background (Karis/Abadie). + if (m_renderDoF) + { + const DepthOfField::Output dofOutput = m_depthOfField->record( + scopeAlloc.child_scope(), cb, cam, + DepthOfField::Input{ + .illumination = illumination, + .depth = depth, + }, + nextFrame); + + gRenderResources.images->release(illumination); + illumination = dofOutput.combinedIlluminationDoF; + } + + gRenderResources.images->release(velocity); + gRenderResources.images->release(depth); + } + gRenderResources.images->release(lightClusters.pointers); + gRenderResources.texelBuffers->release(lightClusters.indicesCount); + gRenderResources.texelBuffers->release(lightClusters.indices); + + const ImageHandle toneMapped = + m_toneMap->record(scopeAlloc.child_scope(), cb, illumination, nextFrame) + .toneMapped; + + gRenderResources.images->release(illumination); + + ImageHandle finalComposite; + if (m_textureDebugActive) + { + const ImVec2 size = m_imguiRenderer->centerAreaSize(); + const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); + const CursorState cursor = gInputHandler.cursor(); + + // Have magnifier when mouse is on (an active) debug view + const bool uiHovered = ImGui::IsAnyItemHovered(); + const bool activeTexture = m_textureDebug->textureSelected(); + const bool cursorWithinArea = + all(greaterThan(cursor.position, vec2(offset.x, offset.y))) && + all(lessThan( + cursor.position, vec2(offset.x + size.x, offset.y + size.y))); + + Optional cursorCoord; + // Don't have debug magnifier when using ui that overlaps the render + // area + if (!uiHovered && activeTexture && cursorWithinArea) + { + // Also don't have magnifier when e.g. mouse look is active. Let + // InputHandler figure out if mouse should be visible or not. + if (!gInputHandler.mouseGesture().has_value()) + { + // The magnifier has its own pointer so let's not mask the view + // with the OS one. + gInputHandler.hideCursor(); + cursorCoord = cursor.position - vec2(offset.x, offset.y); + } + } + else + gInputHandler.showCursor(); + + const ImageHandle debugOutput = m_textureDebug->record( + scopeAlloc.child_scope(), cb, renderArea.extent, cursorCoord, + nextFrame); + + finalComposite = blitColorToFinalComposite( + scopeAlloc.child_scope(), cb, debugOutput, swapImage.extent, + options.drawUi); + + gRenderResources.images->release(debugOutput); + } + else + finalComposite = blitColorToFinalComposite( + scopeAlloc.child_scope(), cb, toneMapped, swapImage.extent, + options.drawUi); + + gRenderResources.images->release(toneMapped); + + if (options.drawUi) + { + world.drawDeferredLoadingUi(); + + if (m_textureDebugActive) + // Draw this after so that the first frame debug is active for a new + // texture, we draw black instead of a potentially wrong output from + // the shared texture that wasn't protected yet + m_textureDebug->drawUi(nextFrame); + + const vk::Rect2D backbufferArea{ + .offset = {0, 0}, + .extent = swapImage.extent, + }; + m_imguiRenderer->endFrame(cb, backbufferArea, finalComposite); + } + + blitFinalComposite(cb, finalComposite, swapImage); + + gRenderResources.images->release(finalComposite); + + readbackDrawStats(cb, nextFrame, gpuDrawStats); + + gRenderResources.buffers->release(gpuDrawStats); + + // Need to preserve both the new and old readback buffers. Release happens + // after the readback is read from when nextFrame wraps around. + for (const BufferHandle buffer : m_gpuDrawStats) + { + if (gRenderResources.buffers->isValidHandle(buffer)) + gRenderResources.buffers->preserve(buffer); + } +} + +const DrawStats &Renderer::drawStats(uint32_t nextFrame) +{ + DrawStats &ret = m_drawStats[nextFrame]; + const BufferHandle gpuStatsHandle = m_gpuDrawStats[nextFrame]; + if (gRenderResources.buffers->isValidHandle(gpuStatsHandle)) + { + const uint32_t *readbackPtr = static_cast( + gRenderResources.buffers->resource(gpuStatsHandle).mapped); + WHEELS_ASSERT(readbackPtr != nullptr); + + ret.drawnMeshletCount = readbackPtr[0]; + ret.rasterizedTriangleCount = readbackPtr[1]; + } + return ret; +} + +const vk::Extent2D &Renderer::viewportExtentInUi() const +{ + return m_viewportExtentInUi; +} + +bool Renderer::viewportResized() const +{ + const ImVec2 viewportSize = m_imguiRenderer->centerAreaSize(); + const bool resized = + asserted_cast(viewportSize.x) != m_viewportExtentInUi.width || + asserted_cast(viewportSize.y) != m_viewportExtentInUi.height; + + return resized; +} + +vec2 Renderer::viewportOffsetInUi() const +{ + const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); + return vec2{offset.x, offset.y}; +} + +float Renderer::lodBias() const { return m_applyTaa ? -1.f : 0.f; } + +bool Renderer::rtInUse() const { return m_referenceRt || m_deferredRt; } + +Optional Renderer::tryDepthReadback() +{ + return m_textureReadback->readback(); +} + +bool Renderer::depthAvailable() const { return !m_referenceRt; } + +ImageHandle Renderer::blitColorToFinalComposite( + ScopedScratch scopeAlloc, vk::CommandBuffer cb, ImageHandle toneMapped, + const vk::Extent2D &swapImageExtent, bool drawUi) +{ + const ImageHandle finalComposite = gRenderResources.images->create( + ImageDescription{ + .format = sFinalCompositeFormat, + .width = swapImageExtent.width, + .height = swapImageExtent.height, + .usageFlags = + vk::ImageUsageFlagBits::eColorAttachment | // Render + vk::ImageUsageFlagBits::eTransferDst | // Blit from tone + // mapped + vk::ImageUsageFlagBits::eTransferSrc, // Blit to swap image + }, + "finalComposite"); + + // Blit tonemapped into cleared final composite before drawing ui on top + transition( + WHEELS_MOV(scopeAlloc), cb, + Transitions{ + .images = StaticArray{{ + {toneMapped, ImageState::TransferSrc}, + {finalComposite, ImageState::TransferDst}, + }}, + }); + + // This scope has a barrier, but that's intentional as it should contain + // both the clear and the blit + PROFILER_CPU_GPU_SCOPE(cb, "blitColorToFinalComposite"); + + const vk::ClearColorValue clearColor{0.f, 0.f, 0.f, 0.f}; + const vk::ImageSubresourceRange subresourceRange{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .baseMipLevel = 0, + .levelCount = 1, + .baseArrayLayer = 0, + .layerCount = 1, + }; + cb.clearColorImage( + gRenderResources.images->nativeHandle(finalComposite), + vk::ImageLayout::eTransferDstOptimal, &clearColor, 1, + &subresourceRange); + + // Memory barrier for finalComposite, layout is already correct + cb.pipelineBarrier( + vk::PipelineStageFlagBits::eTransfer, + vk::PipelineStageFlagBits::eTransfer, vk::DependencyFlags{}, + { + vk::MemoryBarrier{ + .srcAccessMask = vk::AccessFlagBits::eTransferWrite, + .dstAccessMask = vk::AccessFlagBits::eTransferWrite, + }, + }, + {}, {}); + + const vk::ImageSubresourceLayers layers{ + .aspectMask = vk::ImageAspectFlagBits::eColor, + .mipLevel = 0, + .baseArrayLayer = 0, + .layerCount = 1}; + + const vk::Extent3D toneMappedExtent = + gRenderResources.images->resource(toneMapped).extent; + const std::array srcOffsets{ + vk::Offset3D{0, 0, 0}, + vk::Offset3D{ + asserted_cast(toneMappedExtent.width), + asserted_cast(toneMappedExtent.height), + 1, + }, + }; + + ivec2 dstOffset; + ivec2 dstSize; + if (drawUi) + { + const ImVec2 offset = m_imguiRenderer->centerAreaOffset(); + const ImVec2 size = m_imguiRenderer->centerAreaSize(); + dstOffset = ivec2{static_cast(offset.x), offset.y}; + dstSize = ivec2{size.x, size.y}; + } + else + { + dstOffset = ivec2{0, 0}; + dstSize = ivec2{ + asserted_cast(swapImageExtent.width), + asserted_cast(swapImageExtent.height), + }; + } + + const std::array dstOffsets{ + vk::Offset3D{ + std::min( + dstOffset.x, asserted_cast(swapImageExtent.width - 1)), + std::min( + dstOffset.y, + asserted_cast(swapImageExtent.height - 1)), + 0, + }, + vk::Offset3D{ + std::min( + asserted_cast(dstOffset.x + dstSize.x), + asserted_cast(swapImageExtent.width)), + std::min( + asserted_cast(dstOffset.y + dstSize.y), + asserted_cast(swapImageExtent.height)), + 1, + }, + }; + const vk::ImageBlit blit = { + .srcSubresource = layers, + .srcOffsets = srcOffsets, + .dstSubresource = layers, + .dstOffsets = dstOffsets, + }; + cb.blitImage( + gRenderResources.images->nativeHandle(toneMapped), + vk::ImageLayout::eTransferSrcOptimal, + gRenderResources.images->nativeHandle(finalComposite), + vk::ImageLayout::eTransferDstOptimal, 1, &blit, vk::Filter::eLinear); + + return finalComposite; +} + +void Renderer::readbackDrawStats( + vk::CommandBuffer cb, uint32_t nextFrame, BufferHandle srcBuffer) +{ + BufferHandle &dstBuffer = m_gpuDrawStats[nextFrame]; + WHEELS_ASSERT(!gRenderResources.buffers->isValidHandle(dstBuffer)); + dstBuffer = gRenderResources.buffers->create( + BufferDescription{ + .byteSize = sDrawStatsByteSize, + .usage = vk::BufferUsageFlagBits::eTransferDst, + .properties = vk::MemoryPropertyFlagBits::eHostVisible | + vk::MemoryPropertyFlagBits::eHostCoherent, + }, + "DrawStatsReadback"); + WHEELS_ASSERT( + gRenderResources.buffers->resource(srcBuffer).byteSize == + gRenderResources.buffers->resource(dstBuffer).byteSize); + + const StaticArray barriers{{ + *gRenderResources.buffers->transitionBarrier( + srcBuffer, BufferState::TransferSrc, true), + *gRenderResources.buffers->transitionBarrier( + dstBuffer, BufferState::TransferDst, true), + }}; + + cb.pipelineBarrier2(vk::DependencyInfo{ + .bufferMemoryBarrierCount = asserted_cast(barriers.size()), + .pBufferMemoryBarriers = barriers.data(), + }); + + const vk::BufferCopy region{ + .srcOffset = 0, + .dstOffset = 0, + .size = sDrawStatsByteSize, + }; + cb.copyBuffer( + gRenderResources.buffers->nativeHandle(srcBuffer), + gRenderResources.buffers->nativeHandle(dstBuffer), 1, ®ion); +} diff --git a/src/render/Renderer.hpp b/src/render/Renderer.hpp new file mode 100644 index 00000000..ac2a34c8 --- /dev/null +++ b/src/render/Renderer.hpp @@ -0,0 +1,110 @@ +#ifndef PROSPER_RENDER_RENDERER_HPP +#define PROSPER_RENDER_RENDERER_HPP + +#include "../gfx/Fwd.hpp" +#include "../scene/DrawType.hpp" +#include "../scene/Fwd.hpp" +#include "../utils/Utils.hpp" +#include "DrawStats.hpp" +#include "Fwd.hpp" +#include "RenderResourceHandle.hpp" +#include +#include +#include +#include +#include +#include +#include + +class Renderer +{ + public: + Renderer() noexcept; + ~Renderer(); + + Renderer(const Renderer &other) = delete; + Renderer(Renderer &&other) = delete; + Renderer &operator=(const Renderer &other) = delete; + Renderer &operator=(Renderer &&other) = delete; + + void init( + wheels::ScopedScratch scopeAlloc, + const SwapchainConfig &swapchainConfig, + vk::DescriptorSetLayout camDsLayout, + const WorldDSLayouts &worldDsLayouts); + + void recompileShaders( + wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDsLayout, + const WorldDSLayouts &worldDsLayouts, + const wheels::HashSet &changedFiles); + static void recreateSwapchainAndRelated(); + void recreateViewportRelated(); + + void startFrame(); + // Returns true if rt should be marked dirty + [[nodiscard]] bool drawUi(Camera &cam); + + [[nodiscard]] const DrawStats &drawStats(uint32_t nextFrame); + [[nodiscard]] const vk::Extent2D &viewportExtentInUi() const; + // Returns true if the held viewport extent doesn't match the current one + [[nodiscard]] bool viewportResized() const; + [[nodiscard]] glm::vec2 viewportOffsetInUi() const; + [[nodiscard]] float lodBias() const; + [[nodiscard]] bool rtInUse() const; + [[nodiscard]] wheels::Optional tryDepthReadback(); + [[nodiscard]] bool depthAvailable() const; + + struct Options + { + bool rtDirty{false}; + bool drawUi{false}; + wheels::Optional readbackDepthPx; + }; + void render( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, + const Camera &cam, World &world, const vk::Rect2D &renderArea, + const SwapchainImage &swapImage, const uint32_t nextFrame, + const Options &options); + + private: + [[nodiscard]] ImageHandle blitColorToFinalComposite( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, + ImageHandle toneMapped, const vk::Extent2D &swapImageExtent, + bool drawUi); + void readbackDrawStats( + vk::CommandBuffer cb, uint32_t nextFrame, BufferHandle srcBuffer); + + wheels::OwningPtr m_lightClustering; + wheels::OwningPtr m_forwardRenderer; + wheels::OwningPtr m_gbufferRenderer; + wheels::OwningPtr m_deferredShading; + wheels::OwningPtr m_rtDirectIllumination; + wheels::OwningPtr m_rtReference; + wheels::OwningPtr m_skyboxRenderer; + wheels::OwningPtr m_debugRenderer; + wheels::OwningPtr m_toneMap; + wheels::OwningPtr m_imguiRenderer; + wheels::OwningPtr m_textureDebug; + wheels::OwningPtr m_depthOfField; + wheels::OwningPtr m_imageBasedLighting; + wheels::OwningPtr m_temporalAntiAliasing; + wheels::OwningPtr m_meshletCuller; + wheels::OwningPtr m_textureReadback; + + wheels::StaticArray m_drawStats; + wheels::StaticArray m_gpuDrawStats; + + vk::Extent2D m_viewportExtentInUi{}; + + bool m_textureDebugActive{false}; + bool m_referenceRt{false}; + bool m_renderDeferred{true}; + bool m_deferredRt{false}; + bool m_renderDoF{false}; + bool m_applyIbl{false}; + bool m_applyTaa{true}; + bool m_applyJitter{true}; + DrawType m_drawType{DrawType::Default}; +}; + +#endif // PROSPER_RENDER_RENDERER_HPP diff --git a/src/utils/SceneStats.hpp b/src/utils/SceneStats.hpp index 022707d6..6ddd57b5 100644 --- a/src/utils/SceneStats.hpp +++ b/src/utils/SceneStats.hpp @@ -6,10 +6,6 @@ struct SceneStats { - uint32_t totalTriangleCount{0}; - uint32_t totalMeshletCount{0}; - uint32_t totalMeshCount{0}; - uint32_t totalModelCount{0}; uint32_t totalNodeCount{0}; uint32_t animatedNodeCount{0}; }; From eb1cf9854fc04424355f430a1ce822ca7ae07029 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Fri, 12 Jul 2024 14:13:08 +0300 Subject: [PATCH 04/12] Add HierarchicalDepthDownsampler --- res/shader/hiz_downsampler.comp | 103 ++++++++++ src/render/CMakeLists.txt | 2 + src/render/Fwd.hpp | 3 + src/render/HierarchicalDepthDownsampler.cpp | 208 ++++++++++++++++++++ src/render/HierarchicalDepthDownsampler.hpp | 45 +++++ 5 files changed, 361 insertions(+) create mode 100644 res/shader/hiz_downsampler.comp create mode 100644 src/render/HierarchicalDepthDownsampler.cpp create mode 100644 src/render/HierarchicalDepthDownsampler.hpp diff --git a/res/shader/hiz_downsampler.comp b/res/shader/hiz_downsampler.comp new file mode 100644 index 00000000..dc1bb508 --- /dev/null +++ b/res/shader/hiz_downsampler.comp @@ -0,0 +1,103 @@ +#pragma shader_stage(compute) + +#extension GL_EXT_shader_image_load_formatted : require + +// Based on A Life of a Bokeh by Guillaume Abadie +// https://advances.realtimerendering.com/s2018/index.htm +// implemented using https://github.com/GPUOpen-Effects/FidelityFX-SPD + +layout(set = 0, binding = 0) uniform texture2D depthSrc; +layout(set = 0, binding = 1) uniform sampler depthSampler; +layout(set = 0, binding = 2) uniform coherent image2D depthDst[12]; +layout(std430, set = 0, binding = 3) coherent buffer SpdGlobalAtomicBuffer +{ + uint counter; +} +spdGlobalAtomic; + +layout(push_constant) uniform ReducePC +{ + ivec2 topMipResolution; + uint numWorkGroupsPerSlice; + uint mips; +} +PC; + +#define A_GPU 1 +#define A_GLSL 1 +#include "ext/ffx_a.h" + +shared AF4 spdIntermediate[16][16]; +shared AU1 spdCounter; + +AF4 SpdLoadSourceImage(ASU2 p, AU1 slice) +{ + // Clamp to edge + p = min(p, PC.topMipResolution - 1); + + // TODO: + // Single fetch per pixel feels excessive instead of 4 texel gather. Does + // SPD support the latter without hacking? + float nonLinearDepth = + texelFetch(sampler2D(depthSrc, depthSampler), p, 0).x; + + return AF4(nonLinearDepth, 0, 0, 0); +} + +AF4 SpdLoad(ASU2 p, AU1 slice) +{ + // Clamp to edge + ASU2 mip5Res = max(PC.topMipResolution >> 5, ASU2(1)); + p = min(p, mip5Res - 1); + + // TODO: + // Single fetch per pixel feels excessive instead of 4 texel gather. Does + // SPD support the latter without hacking? Is it even possible from a + // image2D? + float nonLinearDepth = imageLoad(depthDst[5], p).x; + + return AF4(nonLinearDepth, 0, 0, 0); +} + +void SpdStore(ASU2 p, AF4 value, AU1 mip, AU1 slice) +{ + // Skip writes that would have gone over + ASU2 mipRes = max(PC.topMipResolution >> mip, ASU2(1)); + if (any(greaterThanEqual(p, mipRes))) + return; + + imageStore(depthDst[mip], p, AF4(value.x, 0, 0, 0)); +} + +void SpdIncreaseAtomicCounter(AU1 slice) +{ + spdCounter = atomicAdd(spdGlobalAtomic.counter, 1); +} + +AF4 SpdLoadIntermediate(AU1 x, AU1 y) { return spdIntermediate[x][y]; } + +void SpdStoreIntermediate(AU1 x, AU1 y, AF4 value) +{ + spdIntermediate[x][y] = AF4(value.x, 0, 0, 0); +} + +AU1 SpdGetAtomicCounter() { return spdCounter; } + +void SpdResetAtomicCounter(AU1 slice) { spdGlobalAtomic.counter = 0; } + +AF4 SpdReduce4(AF4 v0, AF4 v1, AF4 v2, AF4 v3) +{ + // Keep the furthest away sample. + // Reverse-Z so the furthest away sample is the smallest. + return AF4(min(min(v0.x, v1.x), min(v2.x, v3.x)), 0, 0, 0); +} + +#include "ext/ffx_spd.h" + +layout(local_size_x = GROUP_X) in; +void main() +{ + SpdDownsample( + AU2(gl_WorkGroupID.xy), AU1(gl_LocalInvocationIndex), AU1(PC.mips), + AU1(PC.numWorkGroupsPerSlice), AU1(gl_WorkGroupID.z)); +} diff --git a/src/render/CMakeLists.txt b/src/render/CMakeLists.txt index 8bb526d8..eabb3b71 100644 --- a/src/render/CMakeLists.txt +++ b/src/render/CMakeLists.txt @@ -9,6 +9,7 @@ set(PROSPER_RENDER_INCLUDES ${CMAKE_CURRENT_LIST_DIR}/DeferredShading.hpp ${CMAKE_CURRENT_LIST_DIR}/ForwardRenderer.hpp ${CMAKE_CURRENT_LIST_DIR}/GBufferRenderer.hpp + ${CMAKE_CURRENT_LIST_DIR}/HierarchicalDepthDownsampler.hpp ${CMAKE_CURRENT_LIST_DIR}/ImageBasedLighting.hpp ${CMAKE_CURRENT_LIST_DIR}/ImGuiRenderer.hpp ${CMAKE_CURRENT_LIST_DIR}/LightClustering.hpp @@ -37,6 +38,7 @@ set(PROSPER_RENDER_SOURCES ${CMAKE_CURRENT_LIST_DIR}/DeferredShading.cpp ${CMAKE_CURRENT_LIST_DIR}/ForwardRenderer.cpp ${CMAKE_CURRENT_LIST_DIR}/GBufferRenderer.cpp + ${CMAKE_CURRENT_LIST_DIR}/HierarchicalDepthDownsampler.cpp ${CMAKE_CURRENT_LIST_DIR}/ImageBasedLighting.cpp ${CMAKE_CURRENT_LIST_DIR}/ImGuiRenderer.cpp ${CMAKE_CURRENT_LIST_DIR}/LightClustering.cpp diff --git a/src/render/Fwd.hpp b/src/render/Fwd.hpp index ba602e17..aad75f43 100644 --- a/src/render/Fwd.hpp +++ b/src/render/Fwd.hpp @@ -23,6 +23,9 @@ class ForwardRenderer; class GBufferRenderer; struct GBufferRendererOutput; +// HierarchicalDepthDownsampler.hpp +class HierarchicalDepthDownsampler; + // ImageBasedLighting.hpp class ImageBasedLighting; diff --git a/src/render/HierarchicalDepthDownsampler.cpp b/src/render/HierarchicalDepthDownsampler.cpp new file mode 100644 index 00000000..6cb24a03 --- /dev/null +++ b/src/render/HierarchicalDepthDownsampler.cpp @@ -0,0 +1,208 @@ +#include "HierarchicalDepthDownsampler.hpp" + +#include "../gfx/VkUtils.hpp" +#include "../utils/Profiler.hpp" +#include "../utils/Utils.hpp" +#include "RenderResources.hpp" + +using namespace glm; +using namespace wheels; + +namespace +{ + +const uint32_t sGroupSizeX = 256u; +const uint32_t sMaxMips = 12; +const vk::Format sHierarchicalDepthFormat = vk::Format::eR32Sfloat; + +// Ported from ffx_spd.h, removed mip and offset calculations +void SpdSetup( + uvec2 &dispatchThreadGroupCountXY, uint32_t &numWorkGroups, + const uvec4 &rectInfo) +{ + const uint32_t endIndexX = (rectInfo[0] + rectInfo[2] - 1) / + 64; // rectInfo[0] = left, rectInfo[2] = width + const uint32_t endIndexY = (rectInfo[1] + rectInfo[3] - 1) / + 64; // rectInfo[1] = top, rectInfo[3] = height + + dispatchThreadGroupCountXY[0] = endIndexX + 1; + dispatchThreadGroupCountXY[1] = endIndexY + 1; + + numWorkGroups = + (dispatchThreadGroupCountXY[0]) * (dispatchThreadGroupCountXY[1]); +} + +struct PCBlock +{ + ivec2 topMipResolution; + uint32_t numWorkGroupsPerSlice; + uint32_t mips; +}; + +ComputePass::Shader shaderDefinitionCallback(Allocator &alloc) +{ + return ComputePass::Shader{ + .relPath = "shader/hiz_downsampler.comp", + .debugName = String{alloc, "HierarchicalDepthDownsamplerCS"}, + .groupSize = uvec3{sGroupSizeX, 1u, 1u}, + }; +} + +} // namespace + +HierarchicalDepthDownsampler::~HierarchicalDepthDownsampler() +{ + // Don't check for m_initialized as we might be cleaning up after a failed + // init. + gDevice.destroy(m_atomicCounter); +} + +void HierarchicalDepthDownsampler::init(ScopedScratch scopeAlloc) +{ + WHEELS_ASSERT(!m_initialized); + + m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); + // Don't use a shared resource as this is tiny and the clear can be skipped + // after the first frame if we know nothing else uses it. + m_atomicCounter = gDevice.createBuffer(BufferCreateInfo{ + .desc = + BufferDescription{ + .byteSize = sizeof(uint32_t), + .usage = vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer, + .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, + }, + .debugName = "HizDownsamplerCounter"}); + + m_initialized = true; +} + +void HierarchicalDepthDownsampler::recompileShaders( + wheels::ScopedScratch scopeAlloc, + const HashSet &changedFiles) +{ + WHEELS_ASSERT(m_initialized); + + m_computePass.recompileShader( + WHEELS_MOV(scopeAlloc), changedFiles, shaderDefinitionCallback); +} + +ImageHandle HierarchicalDepthDownsampler::record( + ScopedScratch scopeAlloc, vk::CommandBuffer cb, + ImageHandle inNonLinearDepth, const uint32_t nextFrame) +{ + WHEELS_ASSERT(m_initialized); + + const char *const passName = "HiZDownsampler"; + PROFILER_CPU_SCOPE(passName); + + const Image &inDepth = gRenderResources.images->resource(inNonLinearDepth); + WHEELS_ASSERT( + inDepth.format == vk::Format::eD32Sfloat && + "Input depth precision doesn't match HiZ format"); + WHEELS_ASSERT(inDepth.extent.depth == 1); + // 1 px wide/tall inputs won't behave well, but also probably won't happen + WHEELS_ASSERT(inDepth.extent.width > 1); + WHEELS_ASSERT(inDepth.extent.height > 1); + + // Only floor as we want the number of mips to generate, not including mip 0 + const uint32_t hizMipCount = + asserted_cast(floor(std::log2((static_cast( + std::max(inDepth.extent.width, inDepth.extent.height)))))); + // This should work up to 4k + WHEELS_ASSERT(hizMipCount <= sMaxMips); + const uint32_t hizMip0Width = inDepth.extent.width >> 1; + const uint32_t hizMip0Height = inDepth.extent.height >> 1; + + uvec2 dispatchThreadGroupCountXY{}; + PCBlock pcBlock{ + .topMipResolution = + ivec2{ + asserted_cast(inDepth.extent.width), + asserted_cast(inDepth.extent.height), + }, + .mips = hizMipCount, + }; + const uvec4 rectInfo{0, 0, inDepth.extent.width, inDepth.extent.height}; + SpdSetup( + dispatchThreadGroupCountXY, pcBlock.numWorkGroupsPerSlice, rectInfo); + + const ImageHandle outHierarchicalDepth = gRenderResources.images->create( + ImageDescription{ + .format = sHierarchicalDepthFormat, + .width = hizMip0Width, + .height = hizMip0Height, + .mipCount = hizMipCount, + .usageFlags = vk::ImageUsageFlagBits::eSampled | + vk::ImageUsageFlagBits::eStorage, + }, + "HierarchicalDepth"); + + const Span mipViews = + gRenderResources.images->subresourceViews(outHierarchicalDepth); + + StaticArray outputInfos; + { + size_t i = 0; + for (; i < hizMipCount; ++i) + outputInfos[i] = vk::DescriptorImageInfo{ + .imageView = mipViews[i], + .imageLayout = vk::ImageLayout::eGeneral, + }; + // Fill the remaining descriptors with copies of the first one so we + // won't have unbound descriptors. We could use VK_EXT_robustness2 and + // null descriptors, but this seems like less of a hassle since we + // shouldn't be accessing them anyway. + for (; i < sMaxMips; ++i) + outputInfos[i] = vk::DescriptorImageInfo{ + .imageView = mipViews[0], + .imageLayout = vk::ImageLayout::eGeneral, + }; + } + + m_computePass.updateDescriptorSet( + scopeAlloc.child_scope(), nextFrame, + StaticArray{{ + DescriptorInfo{vk::DescriptorImageInfo{ + .imageView = inDepth.view, + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }}, + DescriptorInfo{vk::DescriptorImageInfo{ + .sampler = gRenderResources.nearestSampler, + }}, + DescriptorInfo{outputInfos}, + DescriptorInfo{vk::DescriptorBufferInfo{ + .buffer = m_atomicCounter.handle, + .range = VK_WHOLE_SIZE, + }}, + }}); + + transition( + WHEELS_MOV(scopeAlloc), cb, + Transitions{ + .images = StaticArray{{ + {inNonLinearDepth, ImageState::ComputeShaderSampledRead}, + {outHierarchicalDepth, ImageState::ComputeShaderReadWrite}, + }}, + }); + + if (m_counterNotCleared) + { + m_atomicCounter.transition(cb, BufferState::TransferDst); + // Only need to clear once as SPD will leave this zeroed when the + // dispatch exits + cb.fillBuffer(m_atomicCounter.handle, 0, m_atomicCounter.byteSize, 0); + m_atomicCounter.transition(cb, BufferState::ComputeShaderReadWrite); + m_counterNotCleared = false; + } + + PROFILER_GPU_SCOPE(cb, passName); + + const vk::DescriptorSet descriptorSet = m_computePass.storageSet(nextFrame); + + const uvec3 groupCount = + uvec3{dispatchThreadGroupCountXY[0], dispatchThreadGroupCountXY[1], 1u}; + m_computePass.record(cb, pcBlock, groupCount, Span{&descriptorSet, 1}); + + return outHierarchicalDepth; +} diff --git a/src/render/HierarchicalDepthDownsampler.hpp b/src/render/HierarchicalDepthDownsampler.hpp new file mode 100644 index 00000000..b0f306a9 --- /dev/null +++ b/src/render/HierarchicalDepthDownsampler.hpp @@ -0,0 +1,45 @@ +#ifndef PROSPER_RENDER_HIERARCHICAL_DEPTH_DOWNSAMLPER_HPP +#define PROSPER_RENDER_HIERARCHICAL_DEPTH_DOWNSAMLPER_HPP + +#include +#include + +#include "../gfx/Fwd.hpp" +#include "../gfx/Resources.hpp" +#include "ComputePass.hpp" +#include "Fwd.hpp" +#include "RenderResourceHandle.hpp" + +class HierarchicalDepthDownsampler +{ + public: + HierarchicalDepthDownsampler() noexcept = default; + ~HierarchicalDepthDownsampler(); + + HierarchicalDepthDownsampler(const HierarchicalDepthDownsampler &other) = + delete; + HierarchicalDepthDownsampler(HierarchicalDepthDownsampler &&other) = delete; + HierarchicalDepthDownsampler &operator=( + const HierarchicalDepthDownsampler &other) = delete; + HierarchicalDepthDownsampler &operator=( + HierarchicalDepthDownsampler &&other) = delete; + + void init(wheels::ScopedScratch scopeAlloc); + + void recompileShaders( + wheels::ScopedScratch scopeAlloc, + const wheels::HashSet &changedFiles); + + // Downsamples a depth pyramid, keeping it non-linear to match the input. + [[nodiscard]] ImageHandle record( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, + ImageHandle inNonLinearDepth, uint32_t nextFrame); + + private: + bool m_initialized{false}; + ComputePass m_computePass; + Buffer m_atomicCounter; + bool m_counterNotCleared{true}; +}; + +#endif // PROSPER_RENDER_HIERARCHICAL_DEPTH_DOWNSAMPLER_HPP From 38d756f9f2f156cde4ea0a06a8318360613a2334 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sat, 13 Jul 2024 10:43:53 +0300 Subject: [PATCH 05/12] ComputePass: Add record with pc and argument buffer --- src/render/ComputePass.cpp | 36 ++++++++++++++++++++++++++++++++++++ src/render/ComputePass.hpp | 31 ++++++++++++++++++++++++++++--- 2 files changed, 64 insertions(+), 3 deletions(-) diff --git a/src/render/ComputePass.cpp b/src/render/ComputePass.cpp index a4528075..ac29dcce 100644 --- a/src/render/ComputePass.cpp +++ b/src/render/ComputePass.cpp @@ -235,6 +235,42 @@ void ComputePass::record( } } +void ComputePass::record( + vk::CommandBuffer cb, wheels::Span pcBlockBytes, + vk::Buffer argumentBuffer, + wheels::Span descriptorSets, + wheels::Span dynamicOffsets) +{ + WHEELS_ASSERT(m_initialized); + + WHEELS_ASSERT(m_shaderReflection.has_value()); + WHEELS_ASSERT( + pcBlockBytes.size() == m_shaderReflection->pushConstantsBytesize()); + WHEELS_ASSERT( + dynamicOffsets.size() < sMaxDynamicOffsets && + "At least some AMD and Intel drivers limit this to 8 per buffer type. " + "Let's keep the total under if possible to keep things simple."); + + cb.bindPipeline(vk::PipelineBindPoint::eCompute, m_pipeline); + + cb.bindDescriptorSets( + vk::PipelineBindPoint::eCompute, m_pipelineLayout, 0, // firstSet + asserted_cast(descriptorSets.size()), descriptorSets.data(), + asserted_cast(dynamicOffsets.size()), dynamicOffsets.data()); + + cb.pushConstants( + m_pipelineLayout, vk::ShaderStageFlagBits::eCompute, 0, + asserted_cast(pcBlockBytes.size()), pcBlockBytes.data()); + + cb.dispatchIndirect(argumentBuffer, 0); + + if (m_storageSets[0].size() > 1) + { + // This can equal perFrameRecordLimit if all of them are used + m_nextRecordIndex++; + } +} + void ComputePass::destroyPipelines() { gDevice.logical().destroy(m_pipeline); diff --git a/src/render/ComputePass.hpp b/src/render/ComputePass.hpp index cd7b16d4..9b342dab 100644 --- a/src/render/ComputePass.hpp +++ b/src/render/ComputePass.hpp @@ -74,19 +74,19 @@ class ComputePass // m_groupSize threads per group [[nodiscard]] glm::uvec3 groupCount(glm::uvec3 inputSize) const; - // Increments the conuter for descriptor sets. + // Increments the counter for descriptor sets. void record( vk::CommandBuffer cb, const glm::uvec3 &groupCount, wheels::Span descriptorSets, wheels::Span dynamicOffsets = {}); - // Increments the conuter for descriptor sets. + // Increments the counter for descriptor sets. void record( vk::CommandBuffer cb, vk::Buffer argumentBuffer, wheels::Span descriptorSets, wheels::Span dynamicOffsets = {}); - // Increments the conuter for descriptor sets. + // Increments the counter for descriptor sets. template void record( vk::CommandBuffer cb, const PCBlock &pcBlock, @@ -94,6 +94,12 @@ class ComputePass wheels::Span descriptorSets, wheels::Span dynamicOffsets = {}); + template + void record( + vk::CommandBuffer cb, const PCBlock &pcBlock, vk::Buffer argumentBuffer, + wheels::Span descriptorSets, + wheels::Span dynamicOffsets = {}); + private: [[nodiscard]] bool compileShader( wheels::ScopedScratch scopeAlloc, @@ -105,6 +111,12 @@ class ComputePass wheels::Span descriptorSets, wheels::Span dynamicOffsets = {}); + void record( + vk::CommandBuffer cb, wheels::Span pcBlockBytes, + vk::Buffer argumentBuffer, + wheels::Span descriptorSets, + wheels::Span dynamicOffsets = {}); + void destroyPipelines(); void createDescriptorSets( @@ -149,4 +161,17 @@ void ComputePass::record( groupCount, descriptorSets, dynamicOffsets); } +template +void ComputePass::record( + vk::CommandBuffer cb, const PCBlock &pcBlock, vk::Buffer argumentBuffer, + wheels::Span descriptorSets, + wheels::Span dynamicOffsets) +{ + record( + cb, + wheels::Span{ + reinterpret_cast(&pcBlock), sizeof(pcBlock)}, + argumentBuffer, descriptorSets, dynamicOffsets); +} + #endif // PROSPER_RENDER_COMPUTE_PASS_HPP From f9754a799bfd71fa8cde6920b5de4f531ca49bd9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sun, 14 Jul 2024 15:09:38 +0300 Subject: [PATCH 06/12] RenderImageCollection: Be nice and return also a single mip view from subresourceView() --- src/render/RenderImageCollection.cpp | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/src/render/RenderImageCollection.cpp b/src/render/RenderImageCollection.cpp index e695d887..a2d7454e 100644 --- a/src/render/RenderImageCollection.cpp +++ b/src/render/RenderImageCollection.cpp @@ -31,6 +31,11 @@ wheels::Span RenderImageCollection::subresourceViews( if (views.empty()) { const Image &image = resource(handle); + // Let's be nice and return the single mip view for ergonomics in cases + // where the logical resource might have one or many mips. + if (image.mipCount == 1) + return Span{&image.view, 1}; + views.resize(image.subresourceRange.levelCount); // TODO: // Isolate the last concatenated name if this gets shared resources at From 4b26977ca95107be82d92cae1da6cb793f2cc10d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Thu, 25 Jul 2024 13:34:11 +0300 Subject: [PATCH 07/12] camera: Add max view scale for conservative view radii --- res/shader/scene/camera.glsl | 1 + src/scene/Camera.cpp | 6 ++++++ src/scene/Camera.hpp | 2 ++ 3 files changed, 9 insertions(+) diff --git a/res/shader/scene/camera.glsl b/res/shader/scene/camera.glsl index abccafc0..acb17400 100644 --- a/res/shader/scene/camera.glsl +++ b/res/shader/scene/camera.glsl @@ -23,6 +23,7 @@ layout(std430, set = CAMERA_SET, binding = 0) buffer CameraDSB vec2 previousJitter; float near; float far; + float maxViewScale; } camera; diff --git a/src/scene/Camera.cpp b/src/scene/Camera.cpp index dc7740f6..f9440719 100644 --- a/src/scene/Camera.cpp +++ b/src/scene/Camera.cpp @@ -197,6 +197,7 @@ void Camera::updateBuffer(const wheels::Optional &debugFrustum) .previousJitter = m_previousJitter, .near = m_parameters.zN, .far = m_parameters.zF, + .maxViewScale = m_maxViewScale, }; m_parametersByteOffset = m_constantsRing->write_value(uniforms); } @@ -379,6 +380,11 @@ void Camera::updateWorldToCamera() -dot(right, eye), -dot(newUp, eye), -dot(z, eye), 1.f}; m_cameraToWorld = inverse(m_worldToCamera); + const vec3 scale{ + length(column(m_worldToCamera, 0)), length(column(m_worldToCamera, 1)), + length(column(m_worldToCamera, 2))}; + m_maxViewScale = max(max(scale.x, scale.y), scale.z); + m_clipToCamera = inverse(m_cameraToClip); m_clipToWorld = inverse(m_cameraToClip * m_worldToCamera); diff --git a/src/scene/Camera.hpp b/src/scene/Camera.hpp index eb0567f7..a3d44d67 100644 --- a/src/scene/Camera.hpp +++ b/src/scene/Camera.hpp @@ -64,6 +64,7 @@ struct CameraUniforms glm::vec2 previousJitter; float near; float far; + float maxViewScale; }; struct FrustumCorners @@ -153,6 +154,7 @@ class Camera glm::vec4 m_rightPlane{0.f}; glm::vec4 m_topPlane{0.f}; glm::vec4 m_bottomPlane{0.f}; + float m_maxViewScale{1.f}; wheels::Optional m_bindingsReflection; vk::DescriptorSetLayout m_descriptorSetLayout; From c42047bf0b1c13b1b69e9ea7742e50f1a1d1f8c6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Fri, 26 Jul 2024 08:12:13 +0300 Subject: [PATCH 08/12] RenderResources: Add a nearest sampler with a black float border color --- src/render/RenderResources.cpp | 65 +++++++++++++++++++++------------- src/render/RenderResources.hpp | 1 + 2 files changed, 42 insertions(+), 24 deletions(-) diff --git a/src/render/RenderResources.cpp b/src/render/RenderResources.cpp index 5b16caba..13881d14 100644 --- a/src/render/RenderResources.cpp +++ b/src/render/RenderResources.cpp @@ -23,6 +23,20 @@ void RenderResources::init() this->texelBuffers = OwningPtr(gAllocators.general); + this->nearestBorderBlackFloatSampler = + gDevice.logical().createSampler(vk::SamplerCreateInfo{ + .magFilter = vk::Filter::eNearest, + .minFilter = vk::Filter::eNearest, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToBorder, + .addressModeV = vk::SamplerAddressMode::eClampToBorder, + .addressModeW = vk::SamplerAddressMode::eClampToBorder, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 1, + .minLod = 0, + .maxLod = VK_LOD_CLAMP_NONE, + .borderColor = vk::BorderColor::eFloatOpaqueBlack, + }); this->nearestSampler = gDevice.logical().createSampler(vk::SamplerCreateInfo{ .magFilter = vk::Filter::eNearest, @@ -36,30 +50,32 @@ void RenderResources::init() .minLod = 0, .maxLod = VK_LOD_CLAMP_NONE, }); - bilinearSampler = gDevice.logical().createSampler(vk::SamplerCreateInfo{ - .magFilter = vk::Filter::eLinear, - .minFilter = vk::Filter::eLinear, - .mipmapMode = vk::SamplerMipmapMode::eNearest, - .addressModeU = vk::SamplerAddressMode::eClampToEdge, - .addressModeV = vk::SamplerAddressMode::eClampToEdge, - .addressModeW = vk::SamplerAddressMode::eClampToEdge, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 1, - .minLod = 0, - .maxLod = VK_LOD_CLAMP_NONE, - }); - trilinearSampler = gDevice.logical().createSampler(vk::SamplerCreateInfo{ - .magFilter = vk::Filter::eLinear, - .minFilter = vk::Filter::eLinear, - .mipmapMode = vk::SamplerMipmapMode::eLinear, - .addressModeU = vk::SamplerAddressMode::eClampToEdge, - .addressModeV = vk::SamplerAddressMode::eClampToEdge, - .addressModeW = vk::SamplerAddressMode::eClampToEdge, - .anisotropyEnable = VK_FALSE, - .maxAnisotropy = 1, - .minLod = 0, - .maxLod = VK_LOD_CLAMP_NONE, - }); + this->bilinearSampler = + gDevice.logical().createSampler(vk::SamplerCreateInfo{ + .magFilter = vk::Filter::eLinear, + .minFilter = vk::Filter::eLinear, + .mipmapMode = vk::SamplerMipmapMode::eNearest, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .addressModeW = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 1, + .minLod = 0, + .maxLod = VK_LOD_CLAMP_NONE, + }); + this->trilinearSampler = + gDevice.logical().createSampler(vk::SamplerCreateInfo{ + .magFilter = vk::Filter::eLinear, + .minFilter = vk::Filter::eLinear, + .mipmapMode = vk::SamplerMipmapMode::eLinear, + .addressModeU = vk::SamplerAddressMode::eClampToEdge, + .addressModeV = vk::SamplerAddressMode::eClampToEdge, + .addressModeW = vk::SamplerAddressMode::eClampToEdge, + .anisotropyEnable = VK_FALSE, + .maxAnisotropy = 1, + .minLod = 0, + .maxLod = VK_LOD_CLAMP_NONE, + }); m_initialized = true; } @@ -68,6 +84,7 @@ void RenderResources::destroy() { // Don't check for m_initialized as we might be cleaning up after a failed // init. + gDevice.logical().destroy(nearestBorderBlackFloatSampler); gDevice.logical().destroy(nearestSampler); gDevice.logical().destroy(bilinearSampler); gDevice.logical().destroy(trilinearSampler); diff --git a/src/render/RenderResources.hpp b/src/render/RenderResources.hpp index 61f97712..e3fb7c44 100644 --- a/src/render/RenderResources.hpp +++ b/src/render/RenderResources.hpp @@ -56,6 +56,7 @@ class RenderResources wheels::OwningPtr texelBuffers; wheels::OwningPtr buffers; + vk::Sampler nearestBorderBlackFloatSampler; vk::Sampler nearestSampler; vk::Sampler bilinearSampler; vk::Sampler trilinearSampler; From d228f300e809bf850fca06abf806457fd5054845 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Sun, 14 Jul 2024 17:21:25 +0300 Subject: [PATCH 09/12] Add naive hierarchical culling to opaque draws Isn't conservative for disocclusion yet. That needs the two-pass setup from Aaltonen. --- res/shader/draw_list_culler.comp | 107 +++++++++++++++++++++++++++++++ src/render/ForwardRenderer.cpp | 13 ++-- src/render/ForwardRenderer.hpp | 13 ++-- src/render/GBufferRenderer.cpp | 7 +- src/render/GBufferRenderer.hpp | 6 +- src/render/MeshletCuller.cpp | 81 ++++++++++++++++++++++- src/render/MeshletCuller.hpp | 2 + src/render/Renderer.cpp | 33 +++++++++- src/render/Renderer.hpp | 3 + 9 files changed, 243 insertions(+), 22 deletions(-) diff --git a/res/shader/draw_list_culler.comp b/res/shader/draw_list_culler.comp index f1f2a51d..7fb8ae25 100644 --- a/res/shader/draw_list_culler.comp +++ b/res/shader/draw_list_culler.comp @@ -5,6 +5,7 @@ #extension GL_EXT_shader_8bit_storage : require #extension GL_KHR_shader_subgroup_basic : require #extension GL_KHR_shader_subgroup_ballot : require +#extension GL_EXT_shader_image_load_formatted : require #include "scene/camera.glsl" #include "scene/geometry.glsl" @@ -37,6 +38,18 @@ layout(std430, set = STORAGE_SET, binding = 2) buffer DispatchArguments } outDispatchArguments; +layout(set = STORAGE_SET, binding = 3) uniform texture2D + inHierarchicalDepth[MAX_HIZ_MIPS]; +// This should clamp to 1 on/beyond edges +layout(set = STORAGE_SET, binding = 4) uniform sampler depthSampler; + +layout(push_constant) uniform DrawListCullerPC +{ + // 0 means no hiz bound + uint hizMipCount; +} +PC; + float signedDistance(vec4 plane, vec3 p) { return dot(plane, vec4(p, 1)); } void transformBounds(inout MeshletBounds bounds, Transforms trfn, float scale) @@ -57,6 +70,98 @@ bool isSphereOutsideFrustum(MeshletBounds bounds) signedDistance(camera.topPlane, bounds.center) < -bounds.radius; } +// From https://zeux.io/2023/01/12/approximate-projected-bounds/ +// based on +// 2D Polyhedral Bounds of a Clipped, Perspective-Projected 3D Sphere. Michael +// Mara, Morgan McGuire. 2013 +// Assumes c,r are in view space and that the projection is symmetrical +bool projectSphereView( + vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) +{ + if (c.z < r + znear) + return false; + + vec3 cr = c * r; + float czr2 = c.z * c.z - r * r; + + float vx = sqrt(c.x * c.x + czr2); + float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x); + float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x); + + float vy = sqrt(c.y * c.y + czr2); + float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y); + float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y); + + aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11); + // clip space -> uv space + aabb = aabb.xwzy * vec4(0.5f, -0.5f, 0.5f, -0.5f) + vec4(0.5f); + + return true; +} + +bool isSphereOccluded(MeshletBounds bounds) +{ + if (PC.hizMipCount == 0) + return false; + + vec4 centerInView = camera.worldToCamera * vec4(bounds.center, 1); + + // Figure out bounds radius in texels, this also early outs if the cam is + // inside the bounds, or if the bounds are behind the camera + vec4 aabbScreen; + float conservativeRadius = bounds.radius * camera.maxViewScale; + if (!projectSphereView( + vec3(centerInView.xy, -centerInView.z), conservativeRadius, + camera.near, camera.cameraToClip[0][0], camera.cameraToClip[1][1], + aabbScreen)) + return false; + vec2 aabbDiagonalPx = aabbScreen.zw - aabbScreen.xy; + aabbDiagonalPx *= camera.resolution; + float pxRadius = length(aabbDiagonalPx); + + // Sample from the first mip where the whole sphere will fit a 2x2 texel + // area. floor without + 1 as hiz mip 0 is depth mip 1. clamp to 0 as < 1 + // radii would be negative. + uint hizMip = uint(max(floor(log2(pxRadius)), 0)); + if (hizMip >= PC.hizMipCount) + return false; + + // Figure out what uv to sample hiz from + vec4 centerClipPos = camera.cameraToClip * centerInView; + centerClipPos.xyz /= centerClipPos.w; + + vec2 uv = centerClipPos.xy * .5 + .5; + // Pick the closest 2x2 set of texels around the sample for gather + uv *= camera.resolution; + uv -= .5; + uv = floor(uv); + uv /= camera.resolution; + // Sampler should clamp to a border of 1 so that out of bounds samples don't + // get incorrectly culled + + // Figure out the closest depth on the bounds for conservative culling + // TODO: + // We're only interested in z and w, so xy are extra math here + vec3 viewWorldDir = normalize(camera.eye.xyz - bounds.center); + vec3 closestWorldPos = bounds.center + viewWorldDir * bounds.radius; + vec4 closestClipPos = + camera.cameraToClip * camera.worldToCamera * vec4(closestWorldPos, 1); + float closestDepth = closestClipPos.z / closestClipPos.w; + + // Gather the neighborhood around the sample point + // Let's not worry about the cases when the whole bounds are guaranteed to + // fit in one px. Sub-pixel meshlets are a bad time for perf regardless. + vec4 hizDepths = textureGather( + sampler2D(inHierarchicalDepth[nonuniformEXT(hizMip)], depthSampler), uv, 0); + + // Reverse-Z so furthest away point is the smallest depth value and we know + // the cluster is occluded if its depth value is smaller than the depth + // buffer value + float hizDepth = + min(min(hizDepths.x, hizDepths.y), min(hizDepths.z, hizDepths.w)); + return closestDepth < hizDepth; +} + bool isConeCapHidden(MeshletBounds bounds) { // From meshoptimizer.h @@ -101,6 +206,8 @@ void main() meshletVisible = !isSphereOutsideFrustum(bounds); if (meshletVisible) meshletVisible = !isConeCapHidden(bounds); + if (meshletVisible) + meshletVisible = !isSphereOccluded(bounds); } // Figure out the subgroup offset for writes diff --git a/src/render/ForwardRenderer.cpp b/src/render/ForwardRenderer.cpp index 38a18784..2ab5b3ed 100644 --- a/src/render/ForwardRenderer.cpp +++ b/src/render/ForwardRenderer.cpp @@ -102,8 +102,8 @@ ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, const vk::Rect2D &renderArea, const LightClusteringOutput &lightClusters, - BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, - DrawType drawType, DrawStats *drawStats) + Optional inHierarchicalDepth, BufferHandle inOutDrawStats, + uint32_t nextFrame, bool applyIbl, DrawType drawType, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -119,7 +119,7 @@ ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( .velocity = ret.velocity, .depth = ret.depth, }, - lightClusters, inOutDrawStats, + lightClusters, inHierarchicalDepth, inOutDrawStats, Options{ .ibl = applyIbl, .drawType = drawType, @@ -144,7 +144,7 @@ void ForwardRenderer::recordTransparent( .illumination = inOutTargets.illumination, .depth = inOutTargets.depth, }, - lightClusters, inOutDrawStats, + lightClusters, {}, inOutDrawStats, Options{ .transparents = true, .drawType = drawType, @@ -384,7 +384,8 @@ void ForwardRenderer::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, const uint32_t nextFrame, const RecordInOut &inOutTargets, - const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, + const LightClusteringOutput &lightClusters, + Optional inHierarchicalDepth, BufferHandle inOutDrawStats, const Options &options, DrawStats *drawStats, const char *debugName) { WHEELS_ASSERT(meshletCuller != nullptr); @@ -403,7 +404,7 @@ void ForwardRenderer::record( options.transparents ? "Transparent" : "Opaque"; const MeshletCullerOutput cullerOutput = meshletCuller->record( scopeAlloc.child_scope(), cb, cullerMode, world, cam, nextFrame, - cullerDebugPrefix, drawStats); + inHierarchicalDepth, cullerDebugPrefix, drawStats); updateDescriptorSet( scopeAlloc.child_scope(), nextFrame, options.transparents, cullerOutput, diff --git a/src/render/ForwardRenderer.hpp b/src/render/ForwardRenderer.hpp index 5202b950..aa9303e7 100644 --- a/src/render/ForwardRenderer.hpp +++ b/src/render/ForwardRenderer.hpp @@ -48,9 +48,10 @@ class ForwardRenderer wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, const vk::Rect2D &renderArea, - const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - uint32_t nextFrame, bool applyIbl, DrawType drawType, - DrawStats *drawStats); + const LightClusteringOutput &lightClusters, + wheels::Optional inHierarchicalDepth, + BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, + DrawType drawType, DrawStats *drawStats); struct TransparentInOut { @@ -93,8 +94,10 @@ class ForwardRenderer wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, uint32_t nextFrame, const RecordInOut &inOutTargets, - const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, - const Options &options, DrawStats *drawStats, const char *debugName); + const LightClusteringOutput &lightClusters, + wheels::Optional inHierarchicalDepth, + BufferHandle inOutDrawStats, const Options &options, + DrawStats *drawStats, const char *debugName); struct Attachments { diff --git a/src/render/GBufferRenderer.cpp b/src/render/GBufferRenderer.cpp index af0ea2a8..fdeb45af 100644 --- a/src/render/GBufferRenderer.cpp +++ b/src/render/GBufferRenderer.cpp @@ -106,8 +106,9 @@ void GBufferRenderer::recompileShaders( GBufferRendererOutput GBufferRenderer::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, - DrawType drawType, const uint32_t nextFrame, DrawStats *drawStats) + const vk::Rect2D &renderArea, Optional inHierarchicalDepth, + BufferHandle inOutDrawStats, DrawType drawType, const uint32_t nextFrame, + DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); WHEELS_ASSERT(meshletCuller != nullptr); @@ -146,7 +147,7 @@ GBufferRendererOutput GBufferRenderer::record( const MeshletCullerOutput cullerOutput = meshletCuller->record( scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Opaque, world, - cam, nextFrame, "GBuffer", drawStats); + cam, nextFrame, inHierarchicalDepth, "GBuffer", drawStats); updateDescriptorSet( scopeAlloc.child_scope(), nextFrame, cullerOutput, inOutDrawStats); diff --git a/src/render/GBufferRenderer.hpp b/src/render/GBufferRenderer.hpp index 98ec19e6..6955e4d4 100644 --- a/src/render/GBufferRenderer.hpp +++ b/src/render/GBufferRenderer.hpp @@ -44,8 +44,10 @@ class GBufferRenderer [[nodiscard]] GBufferRendererOutput record( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, - DrawType drawType, uint32_t nextFrame, DrawStats *drawStats); + const vk::Rect2D &renderArea, + wheels::Optional inHierarchicalDepth, + BufferHandle inOutDrawStats, DrawType drawType, uint32_t nextFrame, + DrawStats *drawStats); private: [[nodiscard]] bool compileShaders( diff --git a/src/render/MeshletCuller.cpp b/src/render/MeshletCuller.cpp index 1b15a7eb..dd348d6e 100644 --- a/src/render/MeshletCuller.cpp +++ b/src/render/MeshletCuller.cpp @@ -26,6 +26,8 @@ const uint32_t sCullerGroupSize = 64; // lot based on content const uint32_t sMaxRecordsPerFrame = 2; +const uint32_t sMaxHierarchicalDepthMips = 12; + enum GeneratorBindingSet : uint32_t { GeneratorGeometryBindingSet, @@ -41,6 +43,12 @@ struct GeneratorPCBlock uint matchTransparents; }; +struct CullerPCBlock +{ + // 0 means no hiz bound + uint hizMipCount{0}; +}; + enum CullerBindingSet : uint32_t { CullerCameraBindingSet, @@ -106,13 +114,14 @@ ComputePass::Shader argumentsWriterDefinitionCallback(Allocator &alloc) ComputePass::Shader cullerDefinitionCallback(Allocator &alloc) { - const size_t len = 96; + const size_t len = 120; String defines{alloc, len}; appendDefineStr(defines, "CAMERA_SET", CullerCameraBindingSet); appendDefineStr(defines, "GEOMETRY_SET", CullerGeometryBindingSet); appendDefineStr( defines, "SCENE_INSTANCES_SET", CullerSceneInstancesBindingSet); appendDefineStr(defines, "STORAGE_SET", CullerStorageBindingSet); + appendDefineStr(defines, "MAX_HIZ_MIPS", sMaxHierarchicalDepthMips); WHEELS_ASSERT(defines.size() <= len); return ComputePass::Shader{ @@ -201,7 +210,8 @@ void MeshletCuller::startFrame() MeshletCullerOutput MeshletCuller::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, - const char *debugPrefix, DrawStats *drawStats) + Optional inHierarchicalDepth, const char *debugPrefix, + DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -223,6 +233,7 @@ MeshletCullerOutput MeshletCuller::record( CullerInput{ .dataBuffer = initialList, .argumentBuffer = cullerArgs, + .hierarchicalDepth = inHierarchicalDepth, }, debugPrefix); @@ -412,6 +423,52 @@ MeshletCullerOutput MeshletCuller::recordCullList( argumentsName.extend(debugPrefix); argumentsName.extend("MeshDiscpatchArguments"); + ImageHandle dummyHierarchicalDepth; + if (!input.hierarchicalDepth.has_value()) + { + String dummyHizName{scopeAlloc}; + dummyHizName.extend(debugPrefix); + dummyHizName.extend("DummyHiZ"); + + dummyHierarchicalDepth = gRenderResources.images->create( + ImageDescription{ + .format = vk::Format::eR32Sfloat, + .width = 1, + .height = 1, + .mipCount = 1, + .usageFlags = vk::ImageUsageFlagBits::eSampled, + }, + dummyHizName.c_str()); + } + // TODO: + // Just enable null binds instead of binding dummies? + const ImageHandle hierarchicalDepth = input.hierarchicalDepth.has_value() + ? *input.hierarchicalDepth + : dummyHierarchicalDepth; + + const Span hierarchicalDepthViews = + gRenderResources.images->subresourceViews(hierarchicalDepth); + + StaticArray + hierarchicalDepthInfos; + { + size_t i = 0; + for (; i < hierarchicalDepthViews.size(); ++i) + hierarchicalDepthInfos[i] = vk::DescriptorImageInfo{ + .imageView = hierarchicalDepthViews[i], + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }; + // Fill the remaining descriptors with copies of the first one so we + // won't have unbound descriptors. We could use VK_EXT_robustness2 and + // null descriptors, but this seems like less of a hassle since we + // shouldn't be accessing them anyway. + for (; i < sMaxHierarchicalDepthMips; ++i) + hierarchicalDepthInfos[i] = vk::DescriptorImageInfo{ + .imageView = hierarchicalDepthViews[0], + .imageLayout = vk::ImageLayout::eShaderReadOnlyOptimal, + }; + } + const vk::DeviceSize drawListByteSize = gRenderResources.buffers->resource(input.dataBuffer).byteSize; const MeshletCullerOutput ret{ @@ -451,6 +508,10 @@ MeshletCullerOutput MeshletCuller::recordCullList( gRenderResources.buffers->nativeHandle(ret.argumentBuffer), .range = VK_WHOLE_SIZE, }}, + DescriptorInfo{hierarchicalDepthInfos}, + DescriptorInfo{vk::DescriptorImageInfo{ + .sampler = gRenderResources.nearestBorderBlackFloatSampler, + }}, }}); gRenderResources.buffers->transition( @@ -464,6 +525,9 @@ MeshletCullerOutput MeshletCuller::recordCullList( transition( WHEELS_MOV(scopeAlloc), cb, Transitions{ + .images = StaticArray{{ + {hierarchicalDepth, ImageState::ComputeShaderSampledRead}, + }}, .buffers = StaticArray{{ {input.dataBuffer, BufferState::ComputeShaderRead}, {input.argumentBuffer, BufferState::DrawIndirectRead}, @@ -492,10 +556,21 @@ MeshletCullerOutput MeshletCuller::recordCullList( worldByteOffsets.modelInstanceScales, }}; + const CullerPCBlock pcBlock{ + .hizMipCount = + input.hierarchicalDepth.has_value() + ? gRenderResources.images->resource(*input.hierarchicalDepth) + .mipCount + : 0, + }; + const vk::Buffer argumentsHandle = gRenderResources.buffers->nativeHandle(input.argumentBuffer); m_drawListCuller.record( - cb, argumentsHandle, descriptorSets, dynamicOffsets); + cb, pcBlock, argumentsHandle, descriptorSets, dynamicOffsets); + + if (gRenderResources.images->isValidHandle(dummyHierarchicalDepth)) + gRenderResources.images->release(dummyHierarchicalDepth); return ret; } diff --git a/src/render/MeshletCuller.hpp b/src/render/MeshletCuller.hpp index dce5cc24..0bad6362 100644 --- a/src/render/MeshletCuller.hpp +++ b/src/render/MeshletCuller.hpp @@ -52,6 +52,7 @@ class MeshletCuller [[nodiscard]] MeshletCullerOutput record( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, + wheels::Optional inHierarchicalDepth, const char *debugPrefix, DrawStats *drawStats); private: @@ -68,6 +69,7 @@ class MeshletCuller { BufferHandle dataBuffer; BufferHandle argumentBuffer; + wheels::Optional hierarchicalDepth; }; [[nodiscard]] MeshletCullerOutput recordCullList( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, diff --git a/src/render/Renderer.cpp b/src/render/Renderer.cpp index 4e6c777b..4297ec32 100644 --- a/src/render/Renderer.cpp +++ b/src/render/Renderer.cpp @@ -13,6 +13,7 @@ #include "DeferredShading.hpp" #include "ForwardRenderer.hpp" #include "GBufferRenderer.hpp" +#include "HierarchicalDepthDownsampler.hpp" #include "ImGuiRenderer.hpp" #include "ImageBasedLighting.hpp" #include "LightClustering.hpp" @@ -146,6 +147,7 @@ Renderer::Renderer() noexcept , m_temporalAntiAliasing{OwningPtr{gAllocators.general}} , m_meshletCuller{OwningPtr{gAllocators.general}} , m_textureReadback{OwningPtr{gAllocators.general}} +, m_hizDownsampler{OwningPtr{gAllocators.general}} { } @@ -191,6 +193,7 @@ void Renderer::init( m_meshletCuller->init( scopeAlloc.child_scope(), worldDsLayouts, camDsLayout); m_textureReadback->init(scopeAlloc.child_scope()); + m_hizDownsampler->init(scopeAlloc.child_scope()); LOG_INFO("GPU pass init took %.2fs", gpuPassesInitTimer.getSeconds()); } @@ -252,6 +255,7 @@ void Renderer::recompileShaders( scopeAlloc.child_scope(), changedFiles, camDsLayout); m_meshletCuller->recompileShaders( scopeAlloc.child_scope(), changedFiles, worldDsLayouts, camDsLayout); + m_hizDownsampler->recompileShaders(scopeAlloc.child_scope(), changedFiles); LOG_INFO("Shaders recompiled in %.2fs", t.getSeconds()); } @@ -397,9 +401,20 @@ void Renderer::render( // Opaque if (m_renderDeferred) { + Optional prevHierarchicalDepth; + if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) + prevHierarchicalDepth = m_prevHierarchicalDepth; + const GBufferRendererOutput gbuffer = m_gbufferRenderer->record( scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, cam, - renderArea, gpuDrawStats, m_drawType, nextFrame, &drawStats); + renderArea, prevHierarchicalDepth, gpuDrawStats, m_drawType, + nextFrame, &drawStats); + + if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) + gRenderResources.images->release(m_prevHierarchicalDepth); + m_prevHierarchicalDepth = m_hizDownsampler->record( + scopeAlloc.child_scope(), cb, gbuffer.depth, nextFrame); + gRenderResources.images->preserve(m_prevHierarchicalDepth); if (m_deferredRt) illumination = @@ -433,11 +448,23 @@ void Renderer::render( { m_rtDirectIllumination->releasePreserved(); + Optional prevHierarchicalDepth; + if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) + prevHierarchicalDepth = m_prevHierarchicalDepth; + const ForwardRenderer::OpaqueOutput output = m_forwardRenderer->recordOpaque( scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, - cam, renderArea, lightClusters, gpuDrawStats, nextFrame, - m_applyIbl, m_drawType, &drawStats); + cam, renderArea, lightClusters, prevHierarchicalDepth, + gpuDrawStats, nextFrame, m_applyIbl, m_drawType, + &drawStats); + + if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) + gRenderResources.images->release(m_prevHierarchicalDepth); + m_prevHierarchicalDepth = m_hizDownsampler->record( + scopeAlloc.child_scope(), cb, output.depth, nextFrame); + gRenderResources.images->preserve(m_prevHierarchicalDepth); + illumination = output.illumination; velocity = output.velocity; depth = output.depth; diff --git a/src/render/Renderer.hpp b/src/render/Renderer.hpp index ac2a34c8..7385e25a 100644 --- a/src/render/Renderer.hpp +++ b/src/render/Renderer.hpp @@ -90,10 +90,13 @@ class Renderer wheels::OwningPtr m_temporalAntiAliasing; wheels::OwningPtr m_meshletCuller; wheels::OwningPtr m_textureReadback; + wheels::OwningPtr m_hizDownsampler; wheels::StaticArray m_drawStats; wheels::StaticArray m_gpuDrawStats; + ImageHandle m_prevHierarchicalDepth; + vk::Extent2D m_viewportExtentInUi{}; bool m_textureDebugActive{false}; From 11d0713b6f81726771ae4a37548a19445e0df30b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Fri, 26 Jul 2024 11:16:39 +0300 Subject: [PATCH 10/12] Add in a second culling phase to account for disocclusion Also move meshlet culler as a init input to the geometry renderers because the same instance is always passed in. Similarly, have the geometry renderers own hierarchical depth history to keep it contained where it is needed. --- readme.md | 2 + res/shader/draw_list_culler.comp | 83 ++-- src/render/ForwardRenderer.cpp | 306 ++++++++------ src/render/ForwardRenderer.hpp | 67 ++-- src/render/Fwd.hpp | 4 +- src/render/GBufferRenderer.cpp | 416 ++++++++++++-------- src/render/GBufferRenderer.hpp | 43 +- src/render/HierarchicalDepthDownsampler.cpp | 26 +- src/render/HierarchicalDepthDownsampler.hpp | 6 +- src/render/MeshletCuller.cpp | 189 +++++++-- src/render/MeshletCuller.hpp | 43 +- src/render/Renderer.cpp | 59 ++- src/render/Renderer.hpp | 7 +- 13 files changed, 829 insertions(+), 422 deletions(-) diff --git a/readme.md b/readme.md index 71f17e5c..80cf5096 100644 --- a/readme.md +++ b/readme.md @@ -23,6 +23,8 @@ Vulkan renderer spun off from following https://vulkan-tutorial.com/. Work of [S - Mesh shaders - Drawlist generation and meshlet culling in compute - Not all HW supports task shaders so let's have a unified implementation + - Hierarchical depth culling without explicit blocker geometry + - Based on Aaltonen's work in [GPU-Driven Rendering Pipelines](https://www.advances.realtimerendering.com/s2015/aaltonenhaar_siggraph2015_combined_final_footer_220dpi.pdf) - ReSTIR DI in deferred path - Initial candidate sampling and biased spatial reuse implemented so far - Path tracing reference diff --git a/res/shader/draw_list_culler.comp b/res/shader/draw_list_culler.comp index 7fb8ae25..5c038756 100644 --- a/res/shader/draw_list_culler.comp +++ b/res/shader/draw_list_culler.comp @@ -17,20 +17,21 @@ struct DrawMeshletInstance uint drawInstanceID; uint meshletID; }; + layout(std430, set = STORAGE_SET, binding = 0) readonly buffer InDrawList { uint count; DrawMeshletInstance instance[]; } inDrawList; -layout(std430, set = STORAGE_SET, binding = 1) writeonly buffer OutDrawList + +layout(std430, set = STORAGE_SET, binding = 1) buffer OutDrawList { uint count; DrawMeshletInstance instance[]; } outDrawList; - -layout(std430, set = STORAGE_SET, binding = 2) buffer DispatchArguments +layout(std430, set = STORAGE_SET, binding = 2) buffer OutDispatchArguments { uint groupsX; uint groupsY; @@ -38,15 +39,23 @@ layout(std430, set = STORAGE_SET, binding = 2) buffer DispatchArguments } outDispatchArguments; -layout(set = STORAGE_SET, binding = 3) uniform texture2D +layout(std430, set = STORAGE_SET, binding = 3) buffer OutSecondPhaseDrawList +{ + uint count; + DrawMeshletInstance instance[]; +} +outSecondPhaseDrawList; + +layout(set = STORAGE_SET, binding = 4) uniform texture2D inHierarchicalDepth[MAX_HIZ_MIPS]; // This should clamp to 1 on/beyond edges -layout(set = STORAGE_SET, binding = 4) uniform sampler depthSampler; +layout(set = STORAGE_SET, binding = 5) uniform sampler depthSampler; layout(push_constant) uniform DrawListCullerPC { // 0 means no hiz bound uint hizMipCount; + uint outputSecondPhaseInput; } PC; @@ -152,7 +161,8 @@ bool isSphereOccluded(MeshletBounds bounds) // Let's not worry about the cases when the whole bounds are guaranteed to // fit in one px. Sub-pixel meshlets are a bad time for perf regardless. vec4 hizDepths = textureGather( - sampler2D(inHierarchicalDepth[nonuniformEXT(hizMip)], depthSampler), uv, 0); + sampler2D(inHierarchicalDepth[nonuniformEXT(hizMip)], depthSampler), uv, + 0); // Reverse-Z so furthest away point is the smallest depth value and we know // the cluster is occluded if its depth value is smaller than the depth @@ -174,6 +184,8 @@ layout(local_size_x = GROUP_X) in; void main() { // These were zeroed before the pass to init X for use as the write pointer + // without barriers after this init that now just sets the other group + // counts to 1 if (gl_GlobalInvocationID.x == 0) { outDispatchArguments.groupsY = 1; @@ -196,6 +208,7 @@ void main() float scale = modelInstanceScales.instance[instance.modelInstanceID]; bool meshletVisible = true; + bool meshletOccluded = false; if (scale != 0.) { MeshletBounds bounds = @@ -207,25 +220,51 @@ void main() if (meshletVisible) meshletVisible = !isConeCapHidden(bounds); if (meshletVisible) - meshletVisible = !isSphereOccluded(bounds); + { + meshletOccluded = isSphereOccluded(bounds); + meshletVisible = !meshletOccluded; + } } - // Figure out the subgroup offset for writes - uvec4 visibleMeshletsMask = subgroupBallot(meshletVisible); - uint subgroupMeshletCount = subgroupBallotBitCount(visibleMeshletsMask); - uint subgroupStartOffset; - if (subgroupElect()) { - // Keep count in the buffer in sync for consistency - atomicAdd(outDrawList.count, subgroupMeshletCount); - subgroupStartOffset = - atomicAdd(outDispatchArguments.groupsX, subgroupMeshletCount); + // Figure out the subgroup offset for writes + uvec4 visibleMeshletsMask = subgroupBallot(meshletVisible); + uint subgroupMeshletCount = subgroupBallotBitCount(visibleMeshletsMask); + uint subgroupStartOffset; + if (subgroupElect()) + { + // Keep count in the buffer in sync for consistency + atomicAdd(outDrawList.count, subgroupMeshletCount); + subgroupStartOffset = + atomicAdd(outDispatchArguments.groupsX, subgroupMeshletCount); + } + subgroupStartOffset = subgroupBroadcastFirst(subgroupStartOffset); + + // Write out within the subgroup block + uint threadOffset = + subgroupBallotExclusiveBitCount(visibleMeshletsMask); + if (meshletVisible) + outDrawList.instance[subgroupStartOffset + threadOffset] = + meshletInstance; } - subgroupStartOffset = subgroupBroadcastFirst(subgroupStartOffset); - // Write out within the subgroup block - uint threadOffset = subgroupBallotExclusiveBitCount(visibleMeshletsMask); - if (meshletVisible) - outDrawList.instance[subgroupStartOffset + threadOffset] = - meshletInstance; + if (PC.outputSecondPhaseInput == 1) + { + // Figure out the subgroup offset for writes + uvec4 occludedMeshletsMask = subgroupBallot(meshletOccluded); + uint subgroupMeshletCount = + subgroupBallotBitCount(occludedMeshletsMask); + uint subgroupStartOffset; + if (subgroupElect()) + subgroupStartOffset = + atomicAdd(outSecondPhaseDrawList.count, subgroupMeshletCount); + subgroupStartOffset = subgroupBroadcastFirst(subgroupStartOffset); + + // Write out within the subgroup block + uint threadOffset = + subgroupBallotExclusiveBitCount(occludedMeshletsMask); + if (meshletOccluded) + outSecondPhaseDrawList + .instance[subgroupStartOffset + threadOffset] = meshletInstance; + } } diff --git a/src/render/ForwardRenderer.cpp b/src/render/ForwardRenderer.cpp index 2ab5b3ed..dac8e727 100644 --- a/src/render/ForwardRenderer.cpp +++ b/src/render/ForwardRenderer.cpp @@ -15,6 +15,7 @@ #include "../utils/Profiler.hpp" #include "../utils/Utils.hpp" #include "DrawStats.hpp" +#include "HierarchicalDepthDownsampler.hpp" #include "LightClustering.hpp" #include "MeshletCuller.hpp" #include "RenderResources.hpp" @@ -48,6 +49,12 @@ struct PCBlock uint32_t previousTransformValid{0}; }; +struct Attachments +{ + InlineArray color; + vk::RenderingAttachmentInfo depth; +}; + } // namespace ForwardRenderer::~ForwardRenderer() @@ -63,9 +70,13 @@ ForwardRenderer::~ForwardRenderer() } void ForwardRenderer::init( - ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts) + ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts, + MeshletCuller *meshletCuller, + HierarchicalDepthDownsampler *hierarchicalDepthDownsampler) { WHEELS_ASSERT(!m_initialized); + WHEELS_ASSERT(meshletCuller != nullptr); + WHEELS_ASSERT(hierarchicalDepthDownsampler != nullptr); LOG_INFO("Creating ForwardRenderer"); @@ -75,6 +86,9 @@ void ForwardRenderer::init( createDescriptorSets(scopeAlloc.child_scope()); createGraphicsPipelines(dsLayouts); + m_meshletCuller = meshletCuller; + m_hierarchicalDepthDownsampler = hierarchicalDepthDownsampler; + m_initialized = true; } @@ -98,11 +112,12 @@ void ForwardRenderer::recompileShaders( } } +void ForwardRenderer::startFrame() { m_nextFrameRecord = 0; } + ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( - ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, const LightClusteringOutput &lightClusters, - Optional inHierarchicalDepth, BufferHandle inOutDrawStats, + ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, + const Camera &cam, const vk::Rect2D &renderArea, + const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, DrawType drawType, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); @@ -112,19 +127,94 @@ ForwardRenderer::OpaqueOutput ForwardRenderer::recordOpaque( ret.velocity = createVelocity(renderArea.extent, "velocity"); ret.depth = createDepth(renderArea.extent, "depth"); - record( - WHEELS_MOV(scopeAlloc), cb, meshletCuller, world, cam, nextFrame, + Optional prevHierarchicalDepth; + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + prevHierarchicalDepth = m_previousHierarchicalDepth; + + // Conservative two-phase culling from GPU-Driven Rendering Pipelines + // by Sebastian Aaltonen + + // First phase: + // Cull with previous frame hierarchical depth and draw. Store a second draw + // list with potential culling false positives: all meshlets that were + // culled based on depth. + const MeshletCullerFirstPhaseOutput firstPhaseCullingOutput = + m_meshletCuller->recordFirstPhase( + scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Opaque, world, + cam, nextFrame, prevHierarchicalDepth, "Opaque", drawStats); + + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + gRenderResources.images->release(m_previousHierarchicalDepth); + + recordDraw( + scopeAlloc.child_scope(), cb, world, cam, nextFrame, RecordInOut{ - .illumination = ret.illumination, - .velocity = ret.velocity, - .depth = ret.depth, + .inOutIllumination = ret.illumination, + .inOutVelocity = ret.velocity, + .inOutDepth = ret.depth, + .inOutDrawStats = inOutDrawStats, + .inDataBuffer = firstPhaseCullingOutput.dataBuffer, + .inArgumentBuffer = firstPhaseCullingOutput.argumentBuffer, }, - lightClusters, inHierarchicalDepth, inOutDrawStats, + lightClusters, Options{ .ibl = applyIbl, .drawType = drawType, }, - drawStats, "OpaqueGeometry"); + drawStats, "OpaqueGeometryFirstPhase"); + + gRenderResources.buffers->release(firstPhaseCullingOutput.dataBuffer); + gRenderResources.buffers->release(firstPhaseCullingOutput.argumentBuffer); + + if (firstPhaseCullingOutput.secondPhaseInput.has_value()) + { + // Second phase: + // Another pass over the meshelets that got culled by depth in the first + // pass, now with hierarchical depth built from the first pass result. + // This way we'll now draw any meshlets that got disoccluded in the + // curret frame. + const ImageHandle currentHierarchicalDepth = + m_hierarchicalDepthDownsampler->record( + scopeAlloc.child_scope(), cb, ret.depth, nextFrame, + "OpaqueFirstPhase"); + + const MeshletCullerSecondPhaseOutput secondPhaseCullingOutput = + m_meshletCuller->recordSecondPhase( + scopeAlloc.child_scope(), cb, world, cam, nextFrame, + *firstPhaseCullingOutput.secondPhaseInput, + currentHierarchicalDepth, "Opaque"); + + gRenderResources.buffers->release( + *firstPhaseCullingOutput.secondPhaseInput); + gRenderResources.images->release(currentHierarchicalDepth); + + recordDraw( + scopeAlloc.child_scope(), cb, world, cam, nextFrame, + RecordInOut{ + .inOutIllumination = ret.illumination, + .inOutVelocity = ret.velocity, + .inOutDepth = ret.depth, + .inOutDrawStats = inOutDrawStats, + .inDataBuffer = secondPhaseCullingOutput.dataBuffer, + .inArgumentBuffer = secondPhaseCullingOutput.argumentBuffer, + }, + lightClusters, + Options{ + .ibl = applyIbl, + .secondPhase = true, + .drawType = drawType, + }, + drawStats, "OpaqueGeometrySecondPhase"); + + gRenderResources.buffers->release(secondPhaseCullingOutput.dataBuffer); + gRenderResources.buffers->release( + secondPhaseCullingOutput.argumentBuffer); + } + + // Potential previous pyramid was already freed during first phase + m_previousHierarchicalDepth = m_hierarchicalDepthDownsampler->record( + WHEELS_MOV(scopeAlloc), cb, ret.depth, nextFrame, "OpaqueSecondPhase"); + gRenderResources.images->preserve(m_previousHierarchicalDepth); return ret; } @@ -138,18 +228,36 @@ void ForwardRenderer::recordTransparent( { WHEELS_ASSERT(m_initialized); - record( - WHEELS_MOV(scopeAlloc), cb, meshletCuller, world, cam, nextFrame, + const MeshletCullerFirstPhaseOutput cullerOutput = + meshletCuller->recordFirstPhase( + scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Transparent, + world, cam, nextFrame, {}, "Transparent", drawStats); + WHEELS_ASSERT(!cullerOutput.secondPhaseInput.has_value()); + + recordDraw( + WHEELS_MOV(scopeAlloc), cb, world, cam, nextFrame, RecordInOut{ - .illumination = inOutTargets.illumination, - .depth = inOutTargets.depth, + .inOutIllumination = inOutTargets.illumination, + .inOutDepth = inOutTargets.depth, + .inOutDrawStats = inOutDrawStats, + .inDataBuffer = cullerOutput.dataBuffer, + .inArgumentBuffer = cullerOutput.argumentBuffer, }, - lightClusters, {}, inOutDrawStats, + lightClusters, Options{ .transparents = true, .drawType = drawType, }, drawStats, "TransparentGeometry"); + + gRenderResources.buffers->release(cullerOutput.dataBuffer); + gRenderResources.buffers->release(cullerOutput.argumentBuffer); +} + +void ForwardRenderer::releasePreserved() +{ + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + gRenderResources.images->release(m_previousHierarchicalDepth); } bool ForwardRenderer::compileShaders( @@ -257,32 +365,26 @@ void ForwardRenderer::createDescriptorSets(ScopedScratch scopeAlloc) WHEELS_MOV(scopeAlloc), DrawStatsBindingSet, vk::ShaderStageFlagBits::eMeshEXT); - const StaticArray - layouts{m_meshSetLayout}; - const StaticArray debugNames{ + const StaticArray layouts{ + m_meshSetLayout}; + const StaticArray debugNames{ "ForwardMesh"}; gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_meshSets.mut_span()); } void ForwardRenderer::updateDescriptorSet( - ScopedScratch scopeAlloc, uint32_t nextFrame, bool transparents, - const MeshletCullerOutput &cullerOutput, BufferHandle inOutDrawStats) + ScopedScratch scopeAlloc, vk::DescriptorSet ds, + const DescriptorSetBuffers &buffers) const { - // TODO: - // Don't update if resources are the same as before (for this DS index)? - // Have to compare against both extent and previous native handle? - const vk::DescriptorSet ds = - m_meshSets[nextFrame * MAX_FRAMES_IN_FLIGHT + (transparents ? 1u : 0u)]; - const StaticArray infos{{ DescriptorInfo{vk::DescriptorBufferInfo{ - .buffer = gRenderResources.buffers->nativeHandle(inOutDrawStats), + .buffer = gRenderResources.buffers->nativeHandle(buffers.drawStats), .range = VK_WHOLE_SIZE, }}, DescriptorInfo{vk::DescriptorBufferInfo{ .buffer = - gRenderResources.buffers->nativeHandle(cullerOutput.dataBuffer), + gRenderResources.buffers->nativeHandle(buffers.dataBuffer), .range = VK_WHOLE_SIZE, }}, }}; @@ -380,54 +482,50 @@ void ForwardRenderer::createGraphicsPipelines(const InputDSLayouts &dsLayouts) }); } } -void ForwardRenderer::record( - ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const uint32_t nextFrame, const RecordInOut &inOutTargets, - const LightClusteringOutput &lightClusters, - Optional inHierarchicalDepth, BufferHandle inOutDrawStats, - const Options &options, DrawStats *drawStats, const char *debugName) +void ForwardRenderer::recordDraw( + ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, + const Camera &cam, uint32_t nextFrame, const RecordInOut &inputsOutputs, + const LightClusteringOutput &lightClusters, const Options &options, + DrawStats *drawStats, const char *debugName) { - WHEELS_ASSERT(meshletCuller != nullptr); WHEELS_ASSERT(drawStats != nullptr); PROFILER_CPU_SCOPE(debugName); - const vk::Rect2D renderArea = getRect2D(inOutTargets.illumination); + const vk::Rect2D renderArea = getRect2D(inputsOutputs.inOutIllumination); const size_t pipelineIndex = options.transparents ? 1 : 0; - const MeshletCuller::Mode cullerMode = - options.transparents ? MeshletCuller::Mode::Transparent - : MeshletCuller::Mode::Opaque; - const char *cullerDebugPrefix = - options.transparents ? "Transparent" : "Opaque"; - const MeshletCullerOutput cullerOutput = meshletCuller->record( - scopeAlloc.child_scope(), cb, cullerMode, world, cam, nextFrame, - inHierarchicalDepth, cullerDebugPrefix, drawStats); + const uint32_t dsIndex = + nextFrame * MAX_FRAMES_IN_FLIGHT * 2 + m_nextFrameRecord; + const vk::DescriptorSet ds = m_meshSets[dsIndex]; updateDescriptorSet( - scopeAlloc.child_scope(), nextFrame, options.transparents, cullerOutput, - inOutDrawStats); + scopeAlloc.child_scope(), ds, + DescriptorSetBuffers{ + .dataBuffer = inputsOutputs.inDataBuffer, + .drawStats = inputsOutputs.inOutDrawStats, + }); InlineArray images; images.emplace_back( - inOutTargets.illumination, ImageState::ColorAttachmentReadWrite); + inputsOutputs.inOutIllumination, ImageState::ColorAttachmentReadWrite); images.emplace_back( - inOutTargets.depth, ImageState::DepthAttachmentReadWrite); + inputsOutputs.inOutDepth, ImageState::DepthAttachmentReadWrite); images.emplace_back(lightClusters.pointers, ImageState::FragmentShaderRead); - if (inOutTargets.velocity.isValid()) + if (inputsOutputs.inOutVelocity.isValid()) images.emplace_back( - inOutTargets.velocity, ImageState::ColorAttachmentReadWrite); + inputsOutputs.inOutVelocity, ImageState::ColorAttachmentReadWrite); transition( WHEELS_MOV(scopeAlloc), cb, Transitions{ .images = images, .buffers = StaticArray{{ - {inOutDrawStats, BufferState::MeshShaderReadWrite}, - {cullerOutput.dataBuffer, BufferState::MeshShaderRead}, - {cullerOutput.argumentBuffer, BufferState::DrawIndirectRead}, + {inputsOutputs.inOutDrawStats, + BufferState::MeshShaderReadWrite}, + {inputsOutputs.inDataBuffer, BufferState::MeshShaderRead}, + {inputsOutputs.inArgumentBuffer, BufferState::DrawIndirectRead}, }}, .texelBuffers = StaticArray{{ {lightClusters.indicesCount, BufferState::FragmentShaderRead}, @@ -435,8 +533,35 @@ void ForwardRenderer::record( }}, }); - const Attachments attachments = - createAttachments(inOutTargets, options.transparents); + const vk::AttachmentLoadOp loadOp = + options.secondPhase || options.transparents + ? vk::AttachmentLoadOp::eLoad + : vk::AttachmentLoadOp::eClear; + Attachments attachments; + attachments.color.push_back(vk::RenderingAttachmentInfo{ + .imageView = + gRenderResources.images->resource(inputsOutputs.inOutIllumination) + .view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + }); + if (!options.transparents) + attachments.color.push_back(vk::RenderingAttachmentInfo{ + .imageView = + gRenderResources.images->resource(inputsOutputs.inOutVelocity) + .view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + }); + attachments.depth = vk::RenderingAttachmentInfo{ + .imageView = + gRenderResources.images->resource(inputsOutputs.inOutDepth).view, + .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + }; PROFILER_GPU_SCOPE_WITH_STATS(cb, debugName); @@ -468,8 +593,7 @@ void ForwardRenderer::record( descriptorSets[SceneInstancesBindingSet] = scene.sceneInstancesDescriptorSet; descriptorSets[SkyboxBindingSet] = worldDSes.skybox; - descriptorSets[DrawStatsBindingSet] = - m_meshSets[nextFrame * MAX_FRAMES_IN_FLIGHT + pipelineIndex]; + descriptorSets[DrawStatsBindingSet] = ds; const StaticArray dynamicOffsets{{ worldByteOffsets.directionalLight, @@ -502,68 +626,10 @@ void ForwardRenderer::record( sizeof(PCBlock), &pcBlock); const vk::Buffer argumentHandle = - gRenderResources.buffers->nativeHandle(cullerOutput.argumentBuffer); + gRenderResources.buffers->nativeHandle(inputsOutputs.inArgumentBuffer); cb.drawMeshTasksIndirectEXT(argumentHandle, 0, 1, 0); cb.endRendering(); - gRenderResources.buffers->release(cullerOutput.dataBuffer); - gRenderResources.buffers->release(cullerOutput.argumentBuffer); -} - -ForwardRenderer::Attachments ForwardRenderer::createAttachments( - const RecordInOut &inOutTargets, bool transparents) -{ - Attachments ret; - if (transparents) - { - ret.color.push_back(vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(inOutTargets.illumination) - .view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - }); - ret.depth = vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(inOutTargets.depth).view, - .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eLoad, - .storeOp = vk::AttachmentStoreOp::eStore, - }; - } - else - { - ret.color = InlineArray{ - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(inOutTargets.illumination) - .view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, - }, - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(inOutTargets.velocity) - .view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, - }, - }; - ret.depth = vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(inOutTargets.depth).view, - .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, - }; - } - - return ret; + m_nextFrameRecord++; } diff --git a/src/render/ForwardRenderer.hpp b/src/render/ForwardRenderer.hpp index aa9303e7..ece7ec5f 100644 --- a/src/render/ForwardRenderer.hpp +++ b/src/render/ForwardRenderer.hpp @@ -31,13 +31,17 @@ class ForwardRenderer const WorldDSLayouts &world; }; void init( - wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts); + wheels::ScopedScratch scopeAlloc, const InputDSLayouts &dsLayouts, + MeshletCuller *meshletCuller, + HierarchicalDepthDownsampler *hierarchicalDepthDownsampler); void recompileShaders( wheels::ScopedScratch scopeAlloc, const wheels::HashSet &changedFiles, const InputDSLayouts &dsLayouts); + void startFrame(); + struct OpaqueOutput { ImageHandle illumination; @@ -46,12 +50,10 @@ class ForwardRenderer }; [[nodiscard]] OpaqueOutput recordOpaque( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, - const LightClusteringOutput &lightClusters, - wheels::Optional inHierarchicalDepth, - BufferHandle inOutDrawStats, uint32_t nextFrame, bool applyIbl, - DrawType drawType, DrawStats *drawStats); + const World &world, const Camera &cam, const vk::Rect2D &renderArea, + const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, + uint32_t nextFrame, bool applyIbl, DrawType drawType, + DrawStats *drawStats); struct TransparentInOut { @@ -65,15 +67,22 @@ class ForwardRenderer const LightClusteringOutput &lightClusters, BufferHandle inOutDrawStats, uint32_t nextFrame, DrawType drawType, DrawStats *drawStats); + void releasePreserved(); + private: [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); void createDescriptorSets(wheels::ScopedScratch scopeAlloc); + struct DescriptorSetBuffers + { + BufferHandle dataBuffer; + BufferHandle drawStats; + }; void updateDescriptorSet( - wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, bool transparents, - const MeshletCullerOutput &cullerOutput, BufferHandle inOutDrawStats); + wheels::ScopedScratch scopeAlloc, vk::DescriptorSet ds, + const DescriptorSetBuffers &buffers) const; void destroyGraphicsPipelines(); void createGraphicsPipelines(const InputDSLayouts &dsLayouts); @@ -82,33 +91,30 @@ class ForwardRenderer { bool transparents{false}; bool ibl{false}; + bool secondPhase{false}; DrawType drawType{DrawType::Default}; }; struct RecordInOut { - ImageHandle illumination; - ImageHandle velocity; - ImageHandle depth; + ImageHandle inOutIllumination; + ImageHandle inOutVelocity; + ImageHandle inOutDepth; + BufferHandle inOutDrawStats; + BufferHandle inDataBuffer; + BufferHandle inArgumentBuffer; }; - void record( + void recordDraw( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - uint32_t nextFrame, const RecordInOut &inOutTargets, - const LightClusteringOutput &lightClusters, - wheels::Optional inHierarchicalDepth, - BufferHandle inOutDrawStats, const Options &options, + const World &world, const Camera &cam, uint32_t nextFrame, + const RecordInOut &inputsOutputs, + const LightClusteringOutput &lightClusters, const Options &options, DrawStats *drawStats, const char *debugName); - struct Attachments - { - wheels::InlineArray color; - vk::RenderingAttachmentInfo depth; - }; - [[nodiscard]] static Attachments createAttachments( - const RecordInOut &inOutTargets, bool transparents); - bool m_initialized{false}; + MeshletCuller *m_meshletCuller{nullptr}; + HierarchicalDepthDownsampler *m_hierarchicalDepthDownsampler{nullptr}; + wheels::StaticArray m_shaderStages; wheels::Optional m_meshReflection; wheels::Optional m_fragReflection; @@ -117,9 +123,14 @@ class ForwardRenderer wheels::StaticArray m_pipelines; vk::DescriptorSetLayout m_meshSetLayout; - // Separate sets for transparents and opaque - wheels::StaticArray m_meshSets{ + uint32_t m_nextFrameRecord{0}; + // Separate sets for transparents and opaque, and for the two culling phases + // for each + static const uint32_t sDescriptorSetCount = MAX_FRAMES_IN_FLIGHT * 2 * 2; + wheels::StaticArray m_meshSets{ VK_NULL_HANDLE}; + + ImageHandle m_previousHierarchicalDepth; }; #endif // PROSPER_RENDER_FORWARD_RENDERER_HPP diff --git a/src/render/Fwd.hpp b/src/render/Fwd.hpp index aad75f43..252ef48e 100644 --- a/src/render/Fwd.hpp +++ b/src/render/Fwd.hpp @@ -40,7 +40,9 @@ class LightClustering; struct LightClusteringOutput; // MeshletCuller.hpp -struct MeshletCullerOutput; +struct MeshletCullerSecondPhaseInputBuffers; +struct MeshletCullerFirstPhaseOutput; +struct MeshletCullerSecondPhaseOutput; class MeshletCuller; // Renderer.hpp diff --git a/src/render/GBufferRenderer.cpp b/src/render/GBufferRenderer.cpp index fdeb45af..eed9bbe4 100644 --- a/src/render/GBufferRenderer.cpp +++ b/src/render/GBufferRenderer.cpp @@ -1,7 +1,5 @@ #include "GBufferRenderer.hpp" -#include - #include "../gfx/DescriptorAllocator.hpp" #include "../gfx/VkUtils.hpp" #include "../scene/Camera.hpp" @@ -18,6 +16,8 @@ #include "MeshletCuller.hpp" #include "RenderResources.hpp" #include "RenderTargets.hpp" +#include "wheels/assert.hpp" +#include using namespace glm; using namespace wheels; @@ -35,7 +35,7 @@ enum BindingSet : uint32_t MaterialTexturesBindingSet, GeometryBuffersBindingSet, SceneInstancesBindingSet, - DrawStatsBindingSet, + MeshShaderBindingSet, BindingSetCount, }; @@ -55,9 +55,12 @@ struct Attachments void GBufferRenderer::init( ScopedScratch scopeAlloc, const vk::DescriptorSetLayout camDSLayout, - const WorldDSLayouts &worldDSLayouts) + const WorldDSLayouts &worldDSLayouts, MeshletCuller *meshletCuller, + HierarchicalDepthDownsampler *hierarchicalDepthDownsampler) { WHEELS_ASSERT(!m_initialized); + WHEELS_ASSERT(meshletCuller != nullptr); + WHEELS_ASSERT(hierarchicalDepthDownsampler != nullptr); LOG_INFO("Creating GBufferRenderer"); @@ -67,6 +70,9 @@ void GBufferRenderer::init( createDescriptorSets(scopeAlloc.child_scope()); createGraphicsPipelines(camDSLayout, worldDSLayouts); + m_meshletCuller = meshletCuller; + m_hierarchicalDepthDownsampler = hierarchicalDepthDownsampler; + m_initialized = true; } @@ -104,14 +110,12 @@ void GBufferRenderer::recompileShaders( } GBufferRendererOutput GBufferRenderer::record( - ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, Optional inHierarchicalDepth, + ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, + const Camera &cam, const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, DrawType drawType, const uint32_t nextFrame, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); - WHEELS_ASSERT(meshletCuller != nullptr); WHEELS_ASSERT(drawStats != nullptr); PROFILER_CPU_SCOPE("GBuffer"); @@ -145,146 +149,103 @@ GBufferRendererOutput GBufferRenderer::record( .depth = createDepth(renderArea.extent, "depth"), }; - const MeshletCullerOutput cullerOutput = meshletCuller->record( - scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Opaque, world, - cam, nextFrame, inHierarchicalDepth, "GBuffer", drawStats); - - updateDescriptorSet( - scopeAlloc.child_scope(), nextFrame, cullerOutput, inOutDrawStats); - - transition( - WHEELS_MOV(scopeAlloc), cb, - Transitions{ - .images = StaticArray{{ - {ret.albedoRoughness, ImageState::ColorAttachmentWrite}, - {ret.normalMetalness, ImageState::ColorAttachmentWrite}, - {ret.velocity, ImageState::ColorAttachmentWrite}, - {ret.depth, ImageState::DepthAttachmentReadWrite}, - }}, - .buffers = StaticArray{{ - {inOutDrawStats, BufferState::MeshShaderReadWrite}, - {cullerOutput.dataBuffer, BufferState::MeshShaderRead}, - {cullerOutput.argumentBuffer, - BufferState::DrawIndirectRead}, - }}, - }); - - const Attachments attachments{ - .color = {{ - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(ret.albedoRoughness) - .view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, - }, - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(ret.normalMetalness) - .view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + Optional prevHierarchicalDepth; + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + prevHierarchicalDepth = m_previousHierarchicalDepth; + + // Conservative two-phase culling from GPU-Driven Rendering Pipelines + // by Sebastian Aaltonen + + // First phase: + // Cull with previous frame hierarchical depth and draw. Store a second + // draw list with potential culling false positives: all meshlets that + // were culled based on depth. + const MeshletCullerFirstPhaseOutput firstPhaseCullingOutput = + m_meshletCuller->recordFirstPhase( + scopeAlloc.child_scope(), cb, MeshletCuller::Mode::Opaque, + world, cam, nextFrame, prevHierarchicalDepth, "GBuffer", + drawStats); + + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + gRenderResources.images->release(m_previousHierarchicalDepth); + + { + recordDraw( + scopeAlloc.child_scope(), cb, world, cam, renderArea, nextFrame, + RecordInOut{ + .inDataBuffer = firstPhaseCullingOutput.dataBuffer, + .inArgumentBuffer = firstPhaseCullingOutput.argumentBuffer, + .inOutDrawStats = inOutDrawStats, + .outAlbedoRoughness = ret.albedoRoughness, + .outNormalMetalness = ret.normalMetalness, + .outVelocity = ret.velocity, + .outDepth = ret.depth, }, - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(ret.velocity).view, - .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, - }, - }}, - .depth = - vk::RenderingAttachmentInfo{ - .imageView = - gRenderResources.images->resource(ret.depth).view, - .imageLayout = - vk::ImageLayout::eDepthStencilAttachmentOptimal, - .loadOp = vk::AttachmentLoadOp::eClear, - .storeOp = vk::AttachmentStoreOp::eStore, - .clearValue = - vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + drawType, false, drawStats); + + gRenderResources.buffers->release( + firstPhaseCullingOutput.dataBuffer); + gRenderResources.buffers->release( + firstPhaseCullingOutput.argumentBuffer); + } + + if (firstPhaseCullingOutput.secondPhaseInput.has_value()) + { + // Second phase: + // Another pass over the meshelets that got culled by depth in the + // first pass, now with hierarchical depth built from the first pass + // result. This way we'll now draw any meshlets that got disoccluded + // in the curret frame. + const ImageHandle currentHierarchicalDepth = + m_hierarchicalDepthDownsampler->record( + scopeAlloc.child_scope(), cb, ret.depth, nextFrame, + "GBufferFirstPhase"); + + const MeshletCullerSecondPhaseOutput secondPhaseCullingOutput = + m_meshletCuller->recordSecondPhase( + scopeAlloc.child_scope(), cb, world, cam, nextFrame, + *firstPhaseCullingOutput.secondPhaseInput, + currentHierarchicalDepth, "GBuffer"); + + gRenderResources.images->release(currentHierarchicalDepth); + gRenderResources.buffers->release( + *firstPhaseCullingOutput.secondPhaseInput); + + recordDraw( + scopeAlloc.child_scope(), cb, world, cam, renderArea, nextFrame, + RecordInOut{ + .inDataBuffer = secondPhaseCullingOutput.dataBuffer, + .inArgumentBuffer = secondPhaseCullingOutput.argumentBuffer, + .inOutDrawStats = inOutDrawStats, + .outAlbedoRoughness = ret.albedoRoughness, + .outNormalMetalness = ret.normalMetalness, + .outVelocity = ret.velocity, + .outDepth = ret.depth, }, - }; - - PROFILER_GPU_SCOPE_WITH_STATS(cb, "GBuffer"); - - cb.beginRendering(vk::RenderingInfo{ - .renderArea = renderArea, - .layerCount = 1, - .colorAttachmentCount = - asserted_cast(attachments.color.size()), - .pColorAttachments = attachments.color.data(), - .pDepthAttachment = &attachments.depth, - }); - - cb.bindPipeline(vk::PipelineBindPoint::eGraphics, m_pipeline); - - const Scene &scene = world.currentScene(); - const WorldDescriptorSets &worldDSes = world.descriptorSets(); - const WorldByteOffsets &worldByteOffsets = world.byteOffsets(); - - StaticArray descriptorSets{ - VK_NULL_HANDLE}; - descriptorSets[CameraBindingSet] = cam.descriptorSet(); - descriptorSets[MaterialDatasBindingSet] = - worldDSes.materialDatas[nextFrame]; - descriptorSets[MaterialTexturesBindingSet] = worldDSes.materialTextures; - descriptorSets[GeometryBuffersBindingSet] = - worldDSes.geometry[nextFrame]; - descriptorSets[SceneInstancesBindingSet] = - scene.sceneInstancesDescriptorSet; - descriptorSets[DrawStatsBindingSet] = m_meshSets[nextFrame]; - - const StaticArray dynamicOffsets{{ - cam.bufferOffset(), - worldByteOffsets.globalMaterialConstants, - worldByteOffsets.modelInstanceTransforms, - worldByteOffsets.previousModelInstanceTransforms, - worldByteOffsets.modelInstanceScales, - }}; - - cb.bindDescriptorSets( - vk::PipelineBindPoint::eGraphics, m_pipelineLayout, - 0, // firstSet - asserted_cast(descriptorSets.size()), - descriptorSets.data(), - asserted_cast(dynamicOffsets.size()), - dynamicOffsets.data()); - - setViewportScissor(cb, renderArea); - - const PCBlock pcBlock{ - .previousTransformValid = scene.previousTransformsValid ? 1u : 0u, - .drawType = static_cast(drawType), - }; - cb.pushConstants( - m_pipelineLayout, - vk::ShaderStageFlagBits::eMeshEXT | - vk::ShaderStageFlagBits::eFragment, - 0, // offset - sizeof(PCBlock), &pcBlock); - - const vk::Buffer argumentHandle = - gRenderResources.buffers->nativeHandle(cullerOutput.argumentBuffer); - cb.drawMeshTasksIndirectEXT(argumentHandle, 0, 1, 0); - - cb.endRendering(); - - gRenderResources.buffers->release(cullerOutput.dataBuffer); - gRenderResources.buffers->release(cullerOutput.argumentBuffer); + drawType, true, drawStats); + + gRenderResources.buffers->release( + secondPhaseCullingOutput.dataBuffer); + gRenderResources.buffers->release( + secondPhaseCullingOutput.argumentBuffer); + } + + // Potential previous pyramid was already freed during first phase + m_previousHierarchicalDepth = m_hierarchicalDepthDownsampler->record( + scopeAlloc.child_scope(), cb, ret.depth, nextFrame, + "GBufferSecondPhase"); + gRenderResources.images->preserve(m_previousHierarchicalDepth); } return ret; } +void GBufferRenderer::releasePreserved() +{ + if (gRenderResources.images->isValidHandle(m_previousHierarchicalDepth)) + gRenderResources.images->release(m_previousHierarchicalDepth); +} + bool GBufferRenderer::compileShaders( ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts) { @@ -297,7 +258,7 @@ bool GBufferRenderer::compileShaders( appendDefineStr(meshDefines, "GEOMETRY_SET", GeometryBuffersBindingSet); appendDefineStr( meshDefines, "SCENE_INSTANCES_SET", SceneInstancesBindingSet); - appendDefineStr(meshDefines, "MESH_SHADER_SET", DrawStatsBindingSet); + appendDefineStr(meshDefines, "MESH_SHADER_SET", MeshShaderBindingSet); appendDefineStr(meshDefines, "USE_GBUFFER_PC"); appendDefineStr(meshDefines, "MAX_MS_VERTS", sMaxMsVertices); appendDefineStr(meshDefines, "MAX_MS_PRIMS", sMaxMsTriangles); @@ -380,34 +341,29 @@ void GBufferRenderer::createDescriptorSets(ScopedScratch scopeAlloc) { WHEELS_ASSERT(m_meshReflection.has_value()); m_meshSetLayout = m_meshReflection->createDescriptorSetLayout( - WHEELS_MOV(scopeAlloc), DrawStatsBindingSet, + WHEELS_MOV(scopeAlloc), MeshShaderBindingSet, vk::ShaderStageFlagBits::eMeshEXT); - const StaticArray layouts{ + const StaticArray layouts{ m_meshSetLayout}; - const StaticArray debugNames{ + const StaticArray debugNames{ "GBufferMesh"}; gStaticDescriptorsAlloc.allocate( layouts, debugNames, m_meshSets.mut_span()); } void GBufferRenderer::updateDescriptorSet( - ScopedScratch scopeAlloc, uint32_t nextFrame, - const MeshletCullerOutput &cullerOutput, BufferHandle inOutDrawStats) + ScopedScratch scopeAlloc, vk::DescriptorSet ds, + const DescriptorSetBuffers &buffers) const { - // TODO: - // Don't update if resources are the same as before (for this DS index)? - // Have to compare against both extent and previous native handle? - const vk::DescriptorSet ds = m_meshSets[nextFrame]; - const StaticArray infos{{ DescriptorInfo{vk::DescriptorBufferInfo{ - .buffer = gRenderResources.buffers->nativeHandle(inOutDrawStats), + .buffer = gRenderResources.buffers->nativeHandle(buffers.drawStats), .range = VK_WHOLE_SIZE, }}, DescriptorInfo{vk::DescriptorBufferInfo{ .buffer = - gRenderResources.buffers->nativeHandle(cullerOutput.dataBuffer), + gRenderResources.buffers->nativeHandle(buffers.dataBuffer), .range = VK_WHOLE_SIZE, }}, }}; @@ -415,7 +371,7 @@ void GBufferRenderer::updateDescriptorSet( WHEELS_ASSERT(m_meshReflection.has_value()); const wheels::Array descriptorWrites = m_meshReflection->generateDescriptorWrites( - scopeAlloc, DrawStatsBindingSet, ds, infos); + scopeAlloc, MeshShaderBindingSet, ds, infos); gDevice.logical().updateDescriptorSets( asserted_cast(descriptorWrites.size()), @@ -439,7 +395,7 @@ void GBufferRenderer::createGraphicsPipelines( setLayouts[MaterialTexturesBindingSet] = worldDSLayouts.materialTextures; setLayouts[GeometryBuffersBindingSet] = worldDSLayouts.geometry; setLayouts[SceneInstancesBindingSet] = worldDSLayouts.sceneInstances; - setLayouts[DrawStatsBindingSet] = m_meshSetLayout; + setLayouts[MeshShaderBindingSet] = m_meshSetLayout; const vk::PushConstantRange pcRange{ .stageFlags = vk::ShaderStageFlagBits::eMeshEXT | @@ -480,3 +436,151 @@ void GBufferRenderer::createGraphicsPipelines( .debugName = "GBufferRenderer", }); } + +void GBufferRenderer::recordDraw( + ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, + const Camera &cam, const vk::Rect2D &renderArea, uint32_t nextFrame, + const RecordInOut &inputsOutputs, DrawType drawType, bool isSecondPhase, + DrawStats *drawStats) +{ + WHEELS_ASSERT(drawStats != nullptr); + + const vk::DescriptorSet ds = + m_meshSets[nextFrame * 2 + (isSecondPhase ? 1u : 0u)]; + + const char *debugName = + isSecondPhase ? "GBufferSecondPass" : "GBufferFirstPass"; + + updateDescriptorSet( + scopeAlloc.child_scope(), ds, + DescriptorSetBuffers{ + .dataBuffer = inputsOutputs.inDataBuffer, + .drawStats = inputsOutputs.inOutDrawStats, + }); + + const ImageState colorAttachmentState = + isSecondPhase ? ImageState::ColorAttachmentReadWrite + : ImageState::ColorAttachmentWrite; + + transition( + WHEELS_MOV(scopeAlloc), cb, + Transitions{ + .images = StaticArray{{ + {inputsOutputs.outAlbedoRoughness, colorAttachmentState}, + {inputsOutputs.outNormalMetalness, colorAttachmentState}, + {inputsOutputs.outVelocity, colorAttachmentState}, + {inputsOutputs.outDepth, ImageState::DepthAttachmentReadWrite}, + }}, + .buffers = StaticArray{{ + {inputsOutputs.inOutDrawStats, + BufferState::MeshShaderReadWrite}, + {inputsOutputs.inDataBuffer, BufferState::MeshShaderRead}, + {inputsOutputs.inArgumentBuffer, BufferState::DrawIndirectRead}, + }}, + }); + + const vk::AttachmentLoadOp loadOp = isSecondPhase + ? vk::AttachmentLoadOp::eLoad + : vk::AttachmentLoadOp::eClear; + const Attachments attachments{ + .color = {{ + vk::RenderingAttachmentInfo{ + .imageView = gRenderResources.images + ->resource(inputsOutputs.outAlbedoRoughness) + .view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + }, + vk::RenderingAttachmentInfo{ + .imageView = gRenderResources.images + ->resource(inputsOutputs.outNormalMetalness) + .view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + }, + vk::RenderingAttachmentInfo{ + .imageView = + gRenderResources.images->resource(inputsOutputs.outVelocity) + .view, + .imageLayout = vk::ImageLayout::eColorAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + }, + }}, + .depth = + vk::RenderingAttachmentInfo{ + .imageView = + gRenderResources.images->resource(inputsOutputs.outDepth) + .view, + .imageLayout = vk::ImageLayout::eDepthStencilAttachmentOptimal, + .loadOp = loadOp, + .storeOp = vk::AttachmentStoreOp::eStore, + .clearValue = vk::ClearValue{std::array{0.f, 0.f, 0.f, 0.f}}, + }, + }; + + PROFILER_GPU_SCOPE_WITH_STATS(cb, debugName); + + cb.beginRendering(vk::RenderingInfo{ + .renderArea = renderArea, + .layerCount = 1, + .colorAttachmentCount = + asserted_cast(attachments.color.size()), + .pColorAttachments = attachments.color.data(), + .pDepthAttachment = &attachments.depth, + }); + + cb.bindPipeline(vk::PipelineBindPoint::eGraphics, m_pipeline); + + const Scene &scene = world.currentScene(); + const WorldDescriptorSets &worldDSes = world.descriptorSets(); + const WorldByteOffsets &worldByteOffsets = world.byteOffsets(); + + StaticArray descriptorSets{ + VK_NULL_HANDLE}; + descriptorSets[CameraBindingSet] = cam.descriptorSet(); + descriptorSets[MaterialDatasBindingSet] = + worldDSes.materialDatas[nextFrame]; + descriptorSets[MaterialTexturesBindingSet] = worldDSes.materialTextures; + descriptorSets[GeometryBuffersBindingSet] = worldDSes.geometry[nextFrame]; + descriptorSets[SceneInstancesBindingSet] = + scene.sceneInstancesDescriptorSet; + descriptorSets[MeshShaderBindingSet] = ds; + + const StaticArray dynamicOffsets{{ + cam.bufferOffset(), + worldByteOffsets.globalMaterialConstants, + worldByteOffsets.modelInstanceTransforms, + worldByteOffsets.previousModelInstanceTransforms, + worldByteOffsets.modelInstanceScales, + }}; + + cb.bindDescriptorSets( + vk::PipelineBindPoint::eGraphics, m_pipelineLayout, + 0, // firstSet + asserted_cast(descriptorSets.size()), descriptorSets.data(), + asserted_cast(dynamicOffsets.size()), dynamicOffsets.data()); + + setViewportScissor(cb, renderArea); + + const PCBlock pcBlock{ + .previousTransformValid = scene.previousTransformsValid ? 1u : 0u, + .drawType = static_cast(drawType), + }; + cb.pushConstants( + m_pipelineLayout, + vk::ShaderStageFlagBits::eMeshEXT | vk::ShaderStageFlagBits::eFragment, + 0, // offset + sizeof(PCBlock), &pcBlock); + + const vk::Buffer argumentHandle = + gRenderResources.buffers->nativeHandle(inputsOutputs.inArgumentBuffer); + cb.drawMeshTasksIndirectEXT(argumentHandle, 0, 1, 0); + + cb.endRendering(); +} diff --git a/src/render/GBufferRenderer.hpp b/src/render/GBufferRenderer.hpp index 6955e4d4..4d168484 100644 --- a/src/render/GBufferRenderer.hpp +++ b/src/render/GBufferRenderer.hpp @@ -8,6 +8,7 @@ #include "../utils/Fwd.hpp" #include "Fwd.hpp" #include "RenderResourceHandle.hpp" +#include "render/HierarchicalDepthDownsampler.hpp" #include #include @@ -33,7 +34,8 @@ class GBufferRenderer void init( wheels::ScopedScratch scopeAlloc, vk::DescriptorSetLayout camDSLayout, - const WorldDSLayouts &worldDSLayouts); + const WorldDSLayouts &worldDSLayouts, MeshletCuller *meshletCuller, + HierarchicalDepthDownsampler *hierarchicalDepthDownsampler); void recompileShaders( wheels::ScopedScratch scopeAlloc, @@ -43,20 +45,24 @@ class GBufferRenderer [[nodiscard]] GBufferRendererOutput record( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - MeshletCuller *meshletCuller, const World &world, const Camera &cam, - const vk::Rect2D &renderArea, - wheels::Optional inHierarchicalDepth, + const World &world, const Camera &cam, const vk::Rect2D &renderArea, BufferHandle inOutDrawStats, DrawType drawType, uint32_t nextFrame, DrawStats *drawStats); + void releasePreserved(); private: [[nodiscard]] bool compileShaders( wheels::ScopedScratch scopeAlloc, const WorldDSLayouts &worldDSLayouts); void createDescriptorSets(wheels::ScopedScratch scopeAlloc); + struct DescriptorSetBuffers + { + BufferHandle dataBuffer; + BufferHandle drawStats; + }; void updateDescriptorSet( - wheels::ScopedScratch scopeAlloc, uint32_t nextFrame, - const MeshletCullerOutput &cullerOutput, BufferHandle inOutDrawStats); + wheels::ScopedScratch scopeAlloc, vk::DescriptorSet ds, + const DescriptorSetBuffers &buffers) const; void destroyGraphicsPipeline(); @@ -64,8 +70,27 @@ class GBufferRenderer vk::DescriptorSetLayout camDSLayout, const WorldDSLayouts &worldDSLayouts); + struct RecordInOut + { + BufferHandle inDataBuffer; + BufferHandle inArgumentBuffer; + BufferHandle inOutDrawStats; + ImageHandle outAlbedoRoughness; + ImageHandle outNormalMetalness; + ImageHandle outVelocity; + ImageHandle outDepth; + }; + void recordDraw( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, + const World &world, const Camera &cam, const vk::Rect2D &renderArea, + uint32_t nextFrame, const RecordInOut &inputsOutputs, DrawType drawType, + bool isSecondPhase, DrawStats *drawStats); + bool m_initialized{false}; + MeshletCuller *m_meshletCuller{nullptr}; + HierarchicalDepthDownsampler *m_hierarchicalDepthDownsampler{nullptr}; + wheels::StaticArray m_shaderStages; wheels::Optional m_meshReflection; wheels::Optional m_fragReflection; @@ -74,8 +99,12 @@ class GBufferRenderer vk::Pipeline m_pipeline; vk::DescriptorSetLayout m_meshSetLayout; - wheels::StaticArray m_meshSets{ + // Two sets per frame for the two pass culled draw + static const uint32_t sDescriptorSetCount = MAX_FRAMES_IN_FLIGHT * 2; + wheels::StaticArray m_meshSets{ VK_NULL_HANDLE}; + + ImageHandle m_previousHierarchicalDepth; }; #endif // PROSPER_RENDER_GBUFFER_RENDERER_HPP diff --git a/src/render/HierarchicalDepthDownsampler.cpp b/src/render/HierarchicalDepthDownsampler.cpp index 6cb24a03..b449a787 100644 --- a/src/render/HierarchicalDepthDownsampler.cpp +++ b/src/render/HierarchicalDepthDownsampler.cpp @@ -4,6 +4,7 @@ #include "../utils/Profiler.hpp" #include "../utils/Utils.hpp" #include "RenderResources.hpp" +#include "render/ComputePass.hpp" using namespace glm; using namespace wheels; @@ -61,7 +62,12 @@ void HierarchicalDepthDownsampler::init(ScopedScratch scopeAlloc) { WHEELS_ASSERT(!m_initialized); - m_computePass.init(WHEELS_MOV(scopeAlloc), shaderDefinitionCallback); + m_computePass.init( + WHEELS_MOV(scopeAlloc), shaderDefinitionCallback, + ComputePassOptions{ + // GBuffer HiZ before and after second culling pass + .perFrameRecordLimit = 2, + }); // Don't use a shared resource as this is tiny and the clear can be skipped // after the first frame if we know nothing else uses it. m_atomicCounter = gDevice.createBuffer(BufferCreateInfo{ @@ -86,15 +92,19 @@ void HierarchicalDepthDownsampler::recompileShaders( m_computePass.recompileShader( WHEELS_MOV(scopeAlloc), changedFiles, shaderDefinitionCallback); } +void HierarchicalDepthDownsampler::startFrame() { m_computePass.startFrame(); } ImageHandle HierarchicalDepthDownsampler::record( ScopedScratch scopeAlloc, vk::CommandBuffer cb, - ImageHandle inNonLinearDepth, const uint32_t nextFrame) + ImageHandle inNonLinearDepth, const uint32_t nextFrame, StrSpan debugPrefix) { WHEELS_ASSERT(m_initialized); - const char *const passName = "HiZDownsampler"; - PROFILER_CPU_SCOPE(passName); + String passName{scopeAlloc}; + passName.extend(debugPrefix); + passName.extend("HiZDownsampler"); + + PROFILER_CPU_SCOPE(passName.c_str()); const Image &inDepth = gRenderResources.images->resource(inNonLinearDepth); WHEELS_ASSERT( @@ -127,6 +137,10 @@ ImageHandle HierarchicalDepthDownsampler::record( SpdSetup( dispatchThreadGroupCountXY, pcBlock.numWorkGroupsPerSlice, rectInfo); + String outName{scopeAlloc}; + outName.extend(debugPrefix); + outName.extend("HierarchicalDepth"); + const ImageHandle outHierarchicalDepth = gRenderResources.images->create( ImageDescription{ .format = sHierarchicalDepthFormat, @@ -136,7 +150,7 @@ ImageHandle HierarchicalDepthDownsampler::record( .usageFlags = vk::ImageUsageFlagBits::eSampled | vk::ImageUsageFlagBits::eStorage, }, - "HierarchicalDepth"); + outName.c_str()); const Span mipViews = gRenderResources.images->subresourceViews(outHierarchicalDepth); @@ -196,7 +210,7 @@ ImageHandle HierarchicalDepthDownsampler::record( m_counterNotCleared = false; } - PROFILER_GPU_SCOPE(cb, passName); + PROFILER_GPU_SCOPE(cb, passName.c_str()); const vk::DescriptorSet descriptorSet = m_computePass.storageSet(nextFrame); diff --git a/src/render/HierarchicalDepthDownsampler.hpp b/src/render/HierarchicalDepthDownsampler.hpp index b0f306a9..caffca6d 100644 --- a/src/render/HierarchicalDepthDownsampler.hpp +++ b/src/render/HierarchicalDepthDownsampler.hpp @@ -3,6 +3,7 @@ #include #include +#include #include "../gfx/Fwd.hpp" #include "../gfx/Resources.hpp" @@ -30,10 +31,13 @@ class HierarchicalDepthDownsampler wheels::ScopedScratch scopeAlloc, const wheels::HashSet &changedFiles); + void startFrame(); + // Downsamples a depth pyramid, keeping it non-linear to match the input. [[nodiscard]] ImageHandle record( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - ImageHandle inNonLinearDepth, uint32_t nextFrame); + ImageHandle inNonLinearDepth, uint32_t nextFrame, + wheels::StrSpan debugPrefix); private: bool m_initialized{false}; diff --git a/src/render/MeshletCuller.cpp b/src/render/MeshletCuller.cpp index dd348d6e..78f1934d 100644 --- a/src/render/MeshletCuller.cpp +++ b/src/render/MeshletCuller.cpp @@ -11,6 +11,7 @@ #include "../utils/Profiler.hpp" #include "DrawStats.hpp" #include "RenderResources.hpp" +#include "render/RenderResourceHandle.hpp" using namespace glm; using namespace wheels; @@ -40,13 +41,14 @@ enum GeneratorBindingSet : uint32_t struct GeneratorPCBlock { - uint matchTransparents; + uint32_t matchTransparents; }; struct CullerPCBlock { // 0 means no hiz bound uint hizMipCount{0}; + uint32_t outputSecondPhaseInput{0}; }; enum CullerBindingSet : uint32_t @@ -164,13 +166,15 @@ void MeshletCuller::init( m_cullerArgumentsWriter.init( scopeAlloc.child_scope(), argumentsWriterDefinitionCallback, ComputePassOptions{ - .perFrameRecordLimit = sMaxRecordsPerFrame, + // Twice the records of for two-phase culling + .perFrameRecordLimit = sMaxRecordsPerFrame * 2, }); m_drawListCuller.init( WHEELS_MOV(scopeAlloc), cullerDefinitionCallback, ComputePassOptions{ .storageSetIndex = CullerStorageBindingSet, - .perFrameRecordLimit = sMaxRecordsPerFrame, + // Twice the records of for two-phase culling + .perFrameRecordLimit = sMaxRecordsPerFrame * 2, .externalDsLayouts = cullerExternalDsLayouts(worldDsLayouts, camDsLayout), }); @@ -207,17 +211,17 @@ void MeshletCuller::startFrame() m_drawListCuller.startFrame(); } -MeshletCullerOutput MeshletCuller::record( +MeshletCullerFirstPhaseOutput MeshletCuller::recordFirstPhase( ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, - Optional inHierarchicalDepth, const char *debugPrefix, + const Optional &inHierarchicalDepth, StrSpan debugPrefix, DrawStats *drawStats) { WHEELS_ASSERT(m_initialized); String scopeName{scopeAlloc}; scopeName.extend(debugPrefix); - scopeName.extend("DrawList"); + scopeName.extend("DrawListFirstPhase"); PROFILER_CPU_GPU_SCOPE(cb, scopeName.c_str()); @@ -228,24 +232,70 @@ MeshletCullerOutput MeshletCuller::record( const BufferHandle cullerArgs = recordWriteCullerArgs( scopeAlloc.child_scope(), cb, nextFrame, initialList, debugPrefix); - const MeshletCullerOutput culledList = recordCullList( + const bool outputSecondPhaseInput = inHierarchicalDepth.has_value(); + const CullOutput culledList = recordCullList( WHEELS_MOV(scopeAlloc), cb, world, cam, nextFrame, - CullerInput{ + CullInput{ .dataBuffer = initialList, .argumentBuffer = cullerArgs, .hierarchicalDepth = inHierarchicalDepth, }, - debugPrefix); + outputSecondPhaseInput, debugPrefix); gRenderResources.buffers->release(initialList); gRenderResources.buffers->release(cullerArgs); - return culledList; + MeshletCullerFirstPhaseOutput ret{ + .dataBuffer = culledList.dataBuffer, + .argumentBuffer = culledList.argumentBuffer, + .secondPhaseInput = culledList.secondPhaseInput, + }; + + return ret; +} + +MeshletCullerSecondPhaseOutput MeshletCuller::recordSecondPhase( + ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, + const Camera &cam, uint32_t nextFrame, BufferHandle inputBuffer, + ImageHandle inHierarchicalDepth, StrSpan debugPrefix) +{ + WHEELS_ASSERT(m_initialized); + + String scopeName{scopeAlloc}; + scopeName.extend(debugPrefix); + scopeName.extend("DrawListSecondPhase"); + + PROFILER_CPU_GPU_SCOPE(cb, scopeName.c_str()); + + String argsPrefix{scopeAlloc}; + argsPrefix.extend(debugPrefix); + argsPrefix.extend("SecondPhase"); + + const BufferHandle argumentBuffer = recordWriteCullerArgs( + scopeAlloc.child_scope(), cb, nextFrame, inputBuffer, argsPrefix); + + const CullOutput culledList = recordCullList( + WHEELS_MOV(scopeAlloc), cb, world, cam, nextFrame, + CullInput{ + .dataBuffer = inputBuffer, + .argumentBuffer = argumentBuffer, + .hierarchicalDepth = inHierarchicalDepth, + }, + false, debugPrefix); + + gRenderResources.buffers->release(argumentBuffer); + + MeshletCullerSecondPhaseOutput ret{ + .dataBuffer = culledList.dataBuffer, + .argumentBuffer = culledList.argumentBuffer, + }; + + return ret; } BufferHandle MeshletCuller::recordGenerateList( ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, - const World &world, uint32_t nextFrame, const char *debugPrefix, + const World &world, uint32_t nextFrame, StrSpan debugPrefix, DrawStats *drawStats) { uint32_t meshletCountUpperBound = 0; @@ -365,7 +415,7 @@ BufferHandle MeshletCuller::recordGenerateList( BufferHandle MeshletCuller::recordWriteCullerArgs( ScopedScratch scopeAlloc, vk::CommandBuffer cb, uint32_t nextFrame, - BufferHandle drawList, const char *debugPrefix) + BufferHandle drawList, StrSpan debugPrefix) { String argumentsName{scopeAlloc}; argumentsName.extend(debugPrefix); @@ -410,21 +460,34 @@ BufferHandle MeshletCuller::recordWriteCullerArgs( return ret; } -MeshletCullerOutput MeshletCuller::recordCullList( +MeshletCuller::CullOutput MeshletCuller::recordCullList( ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, - const Camera &cam, uint32_t nextFrame, const CullerInput &input, - const char *debugPrefix) + const Camera &cam, uint32_t nextFrame, const CullInput &input, + bool outputSecondPhaseInput, StrSpan debugPrefix) { String dataName{scopeAlloc}; dataName.extend(debugPrefix); + if (outputSecondPhaseInput) + dataName.extend("FirstPhase"); + // Second phase outputs might be skipped for first phase too so let's not + // confuse debug naming by adding 'SecondPhase' in that case. dataName.extend("CulledMeshletDrawList"); + String secondPhaseDataName{scopeAlloc}; + secondPhaseDataName.extend(debugPrefix); + secondPhaseDataName.extend("SecondPhaseInputDrawList"); + String argumentsName{scopeAlloc}; argumentsName.extend(debugPrefix); + if (outputSecondPhaseInput) + argumentsName.extend("FirstPhase"); + // Second phase outputs might be skipped for first phase too so let's not + // confuse debug naming by adding 'SecondPhase' in that case. argumentsName.extend("MeshDiscpatchArguments"); + const bool hierarchicalDepthGiven = input.hierarchicalDepth.has_value(); ImageHandle dummyHierarchicalDepth; - if (!input.hierarchicalDepth.has_value()) + if (!hierarchicalDepthGiven) { String dummyHizName{scopeAlloc}; dummyHizName.extend(debugPrefix); @@ -471,11 +534,12 @@ MeshletCullerOutput MeshletCuller::recordCullList( const vk::DeviceSize drawListByteSize = gRenderResources.buffers->resource(input.dataBuffer).byteSize; - const MeshletCullerOutput ret{ + CullOutput ret{ .dataBuffer = gRenderResources.buffers->create( BufferDescription{ .byteSize = drawListByteSize, - .usage = vk::BufferUsageFlagBits::eStorageBuffer, + .usage = vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer, .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, }, dataName.c_str()), @@ -488,8 +552,25 @@ MeshletCullerOutput MeshletCuller::recordCullList( .properties = vk::MemoryPropertyFlagBits::eDeviceLocal, }, argumentsName.c_str()), + .secondPhaseInput = + outputSecondPhaseInput + ? gRenderResources.buffers->create( + BufferDescription{ + .byteSize = drawListByteSize, + .usage = vk::BufferUsageFlagBits::eTransferDst | + vk::BufferUsageFlagBits::eStorageBuffer, + .properties = + vk::MemoryPropertyFlagBits::eDeviceLocal, + }, + secondPhaseDataName.c_str()) + : Optional{}, }; + // Bind the first buffer pair twice when we don't have hierarchical depth. + // These binds won't be accessed in the shader + const BufferHandle secondPhaseDataBindBuffer = + outputSecondPhaseInput ? *ret.secondPhaseInput : ret.dataBuffer; + m_drawListCuller.updateDescriptorSet( scopeAlloc.child_scope(), nextFrame, StaticArray{{ @@ -508,33 +589,74 @@ MeshletCullerOutput MeshletCuller::recordCullList( gRenderResources.buffers->nativeHandle(ret.argumentBuffer), .range = VK_WHOLE_SIZE, }}, + DescriptorInfo{vk::DescriptorBufferInfo{ + .buffer = gRenderResources.buffers->nativeHandle( + secondPhaseDataBindBuffer), + .range = VK_WHOLE_SIZE, + }}, DescriptorInfo{hierarchicalDepthInfos}, DescriptorInfo{vk::DescriptorImageInfo{ .sampler = gRenderResources.nearestBorderBlackFloatSampler, }}, }}); - gRenderResources.buffers->transition( - cb, ret.argumentBuffer, BufferState::TransferDst); + { + InlineArray bufferTransitions; + bufferTransitions.emplace_back( + ret.dataBuffer, BufferState::TransferDst); + bufferTransitions.emplace_back( + ret.argumentBuffer, BufferState::TransferDst); + if (outputSecondPhaseInput) + bufferTransitions.emplace_back( + *ret.secondPhaseInput, BufferState::TransferDst); + + transition( + WHEELS_MOV(scopeAlloc), cb, + Transitions{ + .images = StaticArray{{ + {hierarchicalDepth, ImageState::ComputeShaderSampledRead}, + }}, + .buffers = bufferTransitions, + }); + } // Clear args first as X will be used for atomic adds cb.fillBuffer( gRenderResources.buffers->nativeHandle(ret.argumentBuffer), 0, sArgumentsByteSize, 0u); + // Count is also mirrored in data buffer + cb.fillBuffer( + gRenderResources.buffers->nativeHandle(ret.dataBuffer), 0, + sizeof(uint32_t), 0u); + if (outputSecondPhaseInput) + // Same goes for count in second phase input + cb.fillBuffer( + gRenderResources.buffers->nativeHandle(*ret.secondPhaseInput), 0, + sizeof(uint32_t), 0u); - transition( - WHEELS_MOV(scopeAlloc), cb, - Transitions{ - .images = StaticArray{{ - {hierarchicalDepth, ImageState::ComputeShaderSampledRead}, - }}, - .buffers = StaticArray{{ - {input.dataBuffer, BufferState::ComputeShaderRead}, - {input.argumentBuffer, BufferState::DrawIndirectRead}, - {ret.dataBuffer, BufferState::ComputeShaderWrite}, - {ret.argumentBuffer, BufferState::ComputeShaderReadWrite}, - }}, - }); + { + InlineArray bufferTransitions; + bufferTransitions.emplace_back( + input.dataBuffer, BufferState::ComputeShaderRead); + bufferTransitions.emplace_back( + input.argumentBuffer, BufferState::DrawIndirectRead); + bufferTransitions.emplace_back( + ret.dataBuffer, BufferState::ComputeShaderReadWrite); + bufferTransitions.emplace_back( + ret.argumentBuffer, BufferState::ComputeShaderReadWrite); + if (outputSecondPhaseInput) + bufferTransitions.emplace_back( + *ret.secondPhaseInput, BufferState::ComputeShaderReadWrite); + + transition( + WHEELS_MOV(scopeAlloc), cb, + Transitions{ + .images = StaticArray{{ + {hierarchicalDepth, ImageState::ComputeShaderSampledRead}, + }}, + .buffers = bufferTransitions, + }); + } const Scene &scene = world.currentScene(); const WorldDescriptorSets &worldDSes = world.descriptorSets(); @@ -562,6 +684,7 @@ MeshletCullerOutput MeshletCuller::recordCullList( ? gRenderResources.images->resource(*input.hierarchicalDepth) .mipCount : 0, + .outputSecondPhaseInput = outputSecondPhaseInput ? 1u : 0u, }; const vk::Buffer argumentsHandle = diff --git a/src/render/MeshletCuller.hpp b/src/render/MeshletCuller.hpp index 0bad6362..2e5192e4 100644 --- a/src/render/MeshletCuller.hpp +++ b/src/render/MeshletCuller.hpp @@ -13,9 +13,17 @@ #include #include #include +#include #include -struct MeshletCullerOutput +struct MeshletCullerFirstPhaseOutput +{ + BufferHandle dataBuffer; + BufferHandle argumentBuffer; + wheels::Optional secondPhaseInput; +}; + +struct MeshletCullerSecondPhaseOutput { BufferHandle dataBuffer; BufferHandle argumentBuffer; @@ -49,32 +57,49 @@ class MeshletCuller Opaque, Transparent, }; - [[nodiscard]] MeshletCullerOutput record( + // Creates and culls meshlet draw lists from full scene data. Also creates + // input for second phase if inHierarchicalDepth is given. + [[nodiscard]] MeshletCullerFirstPhaseOutput recordFirstPhase( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, const World &world, const Camera &cam, uint32_t nextFrame, - wheels::Optional inHierarchicalDepth, - const char *debugPrefix, DrawStats *drawStats); + const wheels::Optional &inHierarchicalDepth, + wheels::StrSpan debugPrefix, DrawStats *drawStats); + + // Culls the input draw lists. Intended to use with depth drawn with the + // first pass outputs + [[nodiscard]] MeshletCullerSecondPhaseOutput recordSecondPhase( + wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, + const World &world, const Camera &cam, uint32_t nextFrame, + BufferHandle inputBuffer, ImageHandle inHierarchicalDepth, + wheels::StrSpan debugPrefix); private: [[nodiscard]] BufferHandle recordGenerateList( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, Mode mode, - const World &world, uint32_t nextFrame, const char *debugPrefix, + const World &world, uint32_t nextFrame, wheels::StrSpan debugPrefix, DrawStats *drawStats); [[nodiscard]] BufferHandle recordWriteCullerArgs( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, - uint32_t nextFrame, BufferHandle drawList, const char *debugPrefix); + uint32_t nextFrame, BufferHandle drawList, wheels::StrSpan debugPrefix); - struct CullerInput + struct CullInput { BufferHandle dataBuffer; BufferHandle argumentBuffer; wheels::Optional hierarchicalDepth; }; - [[nodiscard]] MeshletCullerOutput recordCullList( + struct CullOutput + { + BufferHandle dataBuffer; + BufferHandle argumentBuffer; + wheels::Optional secondPhaseInput; + }; + [[nodiscard]] CullOutput recordCullList( wheels::ScopedScratch scopeAlloc, vk::CommandBuffer cb, const World &world, const Camera &cam, uint32_t nextFrame, - const CullerInput &input, const char *debugPrefix); + const CullInput &input, bool outputSecondPhaseInputs, + wheels::StrSpan debugPrefix); bool m_initialized{false}; diff --git a/src/render/Renderer.cpp b/src/render/Renderer.cpp index 4297ec32..0f39c4db 100644 --- a/src/render/Renderer.cpp +++ b/src/render/Renderer.cpp @@ -131,7 +131,10 @@ void blitFinalComposite( } // namespace Renderer::Renderer() noexcept -: m_lightClustering{OwningPtr{gAllocators.general}} +: m_meshletCuller{OwningPtr{gAllocators.general}} +, m_hierarchicalDepthDownsampler{OwningPtr{ + gAllocators.general}} +, m_lightClustering{OwningPtr{gAllocators.general}} , m_forwardRenderer{OwningPtr{gAllocators.general}} , m_gbufferRenderer{OwningPtr{gAllocators.general}} , m_deferredShading{OwningPtr{gAllocators.general}} @@ -145,9 +148,7 @@ Renderer::Renderer() noexcept , m_depthOfField{OwningPtr{gAllocators.general}} , m_imageBasedLighting{OwningPtr{gAllocators.general}} , m_temporalAntiAliasing{OwningPtr{gAllocators.general}} -, m_meshletCuller{OwningPtr{gAllocators.general}} , m_textureReadback{OwningPtr{gAllocators.general}} -, m_hizDownsampler{OwningPtr{gAllocators.general}} { } @@ -160,6 +161,9 @@ void Renderer::init( vk::DescriptorSetLayout camDsLayout, const WorldDSLayouts &worldDsLayouts) { const Timer gpuPassesInitTimer; + m_meshletCuller->init( + scopeAlloc.child_scope(), worldDsLayouts, camDsLayout); + m_hierarchicalDepthDownsampler->init(scopeAlloc.child_scope()); m_lightClustering->init( scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); m_forwardRenderer->init( @@ -168,9 +172,11 @@ void Renderer::init( .camera = camDsLayout, .lightClusters = m_lightClustering->descriptorSetLayout(), .world = worldDsLayouts, - }); + }, + m_meshletCuller.get(), m_hierarchicalDepthDownsampler.get()); m_gbufferRenderer->init( - scopeAlloc.child_scope(), camDsLayout, worldDsLayouts); + scopeAlloc.child_scope(), camDsLayout, worldDsLayouts, + m_meshletCuller.get(), m_hierarchicalDepthDownsampler.get()); m_deferredShading->init( scopeAlloc.child_scope(), DeferredShading::InputDSLayouts{ @@ -190,10 +196,7 @@ void Renderer::init( m_depthOfField->init(scopeAlloc.child_scope(), camDsLayout); m_imageBasedLighting->init(scopeAlloc.child_scope()); m_temporalAntiAliasing->init(scopeAlloc.child_scope(), camDsLayout); - m_meshletCuller->init( - scopeAlloc.child_scope(), worldDsLayouts, camDsLayout); m_textureReadback->init(scopeAlloc.child_scope()); - m_hizDownsampler->init(scopeAlloc.child_scope()); LOG_INFO("GPU pass init took %.2fs", gpuPassesInitTimer.getSeconds()); } @@ -201,6 +204,8 @@ void Renderer::startFrame() { gRenderResources.startFrame(); m_meshletCuller->startFrame(); + m_hierarchicalDepthDownsampler->startFrame(); + m_forwardRenderer->startFrame(); m_depthOfField->startFrame(); m_textureReadback->startFrame(); @@ -255,7 +260,8 @@ void Renderer::recompileShaders( scopeAlloc.child_scope(), changedFiles, camDsLayout); m_meshletCuller->recompileShaders( scopeAlloc.child_scope(), changedFiles, worldDsLayouts, camDsLayout); - m_hizDownsampler->recompileShaders(scopeAlloc.child_scope(), changedFiles); + m_hierarchicalDepthDownsampler->recompileShaders( + scopeAlloc.child_scope(), changedFiles); LOG_INFO("Shaders recompiled in %.2fs", t.getSeconds()); } @@ -375,6 +381,8 @@ void Renderer::render( ImageHandle illumination; if (m_referenceRt) { + m_forwardRenderer->releasePreserved(); + m_gbufferRenderer->releasePreserved(); m_rtDirectIllumination->releasePreserved(); m_temporalAntiAliasing->releasePreserved(); @@ -401,20 +409,11 @@ void Renderer::render( // Opaque if (m_renderDeferred) { - Optional prevHierarchicalDepth; - if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) - prevHierarchicalDepth = m_prevHierarchicalDepth; + m_forwardRenderer->releasePreserved(); const GBufferRendererOutput gbuffer = m_gbufferRenderer->record( - scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, cam, - renderArea, prevHierarchicalDepth, gpuDrawStats, m_drawType, - nextFrame, &drawStats); - - if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) - gRenderResources.images->release(m_prevHierarchicalDepth); - m_prevHierarchicalDepth = m_hizDownsampler->record( - scopeAlloc.child_scope(), cb, gbuffer.depth, nextFrame); - gRenderResources.images->preserve(m_prevHierarchicalDepth); + scopeAlloc.child_scope(), cb, world, cam, renderArea, + gpuDrawStats, m_drawType, nextFrame, &drawStats); if (m_deferredRt) illumination = @@ -446,24 +445,14 @@ void Renderer::render( } else { + m_gbufferRenderer->releasePreserved(); m_rtDirectIllumination->releasePreserved(); - Optional prevHierarchicalDepth; - if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) - prevHierarchicalDepth = m_prevHierarchicalDepth; - const ForwardRenderer::OpaqueOutput output = m_forwardRenderer->recordOpaque( - scopeAlloc.child_scope(), cb, m_meshletCuller.get(), world, - cam, renderArea, lightClusters, prevHierarchicalDepth, - gpuDrawStats, nextFrame, m_applyIbl, m_drawType, - &drawStats); - - if (gRenderResources.images->isValidHandle(m_prevHierarchicalDepth)) - gRenderResources.images->release(m_prevHierarchicalDepth); - m_prevHierarchicalDepth = m_hizDownsampler->record( - scopeAlloc.child_scope(), cb, output.depth, nextFrame); - gRenderResources.images->preserve(m_prevHierarchicalDepth); + scopeAlloc.child_scope(), cb, world, cam, renderArea, + lightClusters, gpuDrawStats, nextFrame, m_applyIbl, + m_drawType, &drawStats); illumination = output.illumination; velocity = output.velocity; diff --git a/src/render/Renderer.hpp b/src/render/Renderer.hpp index 7385e25a..28230625 100644 --- a/src/render/Renderer.hpp +++ b/src/render/Renderer.hpp @@ -74,6 +74,9 @@ class Renderer void readbackDrawStats( vk::CommandBuffer cb, uint32_t nextFrame, BufferHandle srcBuffer); + wheels::OwningPtr m_meshletCuller; + wheels::OwningPtr + m_hierarchicalDepthDownsampler; wheels::OwningPtr m_lightClustering; wheels::OwningPtr m_forwardRenderer; wheels::OwningPtr m_gbufferRenderer; @@ -88,15 +91,11 @@ class Renderer wheels::OwningPtr m_depthOfField; wheels::OwningPtr m_imageBasedLighting; wheels::OwningPtr m_temporalAntiAliasing; - wheels::OwningPtr m_meshletCuller; wheels::OwningPtr m_textureReadback; - wheels::OwningPtr m_hizDownsampler; wheels::StaticArray m_drawStats; wheels::StaticArray m_gpuDrawStats; - ImageHandle m_prevHierarchicalDepth; - vk::Extent2D m_viewportExtentInUi{}; bool m_textureDebugActive{false}; From 2b2e82fbd1a9f5f08d9ab8d7bd4ec5e736a8e8d7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Thu, 1 Aug 2024 17:29:44 +0300 Subject: [PATCH 11/12] Fix increment UB Can't modify a value twice between sequence points. --- src/render/RtReference.cpp | 2 +- src/render/rtdi/RtDiInitialReservoirs.cpp | 2 +- src/render/rtdi/RtDiTrace.cpp | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/render/RtReference.cpp b/src/render/RtReference.cpp index 4c8b6325..e2662154 100644 --- a/src/render/RtReference.cpp +++ b/src/render/RtReference.cpp @@ -176,7 +176,7 @@ RtReference::Output RtReference::record( PROFILER_CPU_SCOPE("RtReference"); - m_frameIndex = ++m_frameIndex % sFramePeriod; + m_frameIndex = (m_frameIndex + 1) % sFramePeriod; Output ret; { diff --git a/src/render/rtdi/RtDiInitialReservoirs.cpp b/src/render/rtdi/RtDiInitialReservoirs.cpp index 53fb8a22..b9a65a03 100644 --- a/src/render/rtdi/RtDiInitialReservoirs.cpp +++ b/src/render/rtdi/RtDiInitialReservoirs.cpp @@ -104,7 +104,7 @@ RtDiInitialReservoirs::Output RtDiInitialReservoirs::record( PROFILER_CPU_SCOPE(" InitialReservoirs"); - m_frameIndex = ++m_frameIndex % sFramePeriod; + m_frameIndex = (m_frameIndex + 1) % sFramePeriod; Output ret; { diff --git a/src/render/rtdi/RtDiTrace.cpp b/src/render/rtdi/RtDiTrace.cpp index 73110cb8..c4ba89b5 100644 --- a/src/render/rtdi/RtDiTrace.cpp +++ b/src/render/rtdi/RtDiTrace.cpp @@ -149,7 +149,7 @@ RtDiTrace::Output RtDiTrace::record( PROFILER_CPU_SCOPE(" Trace"); - m_frameIndex = ++m_frameIndex % sFramePeriod; + m_frameIndex = (m_frameIndex + 1) % sFramePeriod; Output ret; { From 608b198e80a3acf3679d97375c688bb9717df7e7 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Santeri=20Salmij=C3=A4rvi?= Date: Thu, 1 Aug 2024 17:33:39 +0300 Subject: [PATCH 12/12] vscode: Use mold on linux Link times go from almost 10s down to 1.5s --- .vscode/tasks.json | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/.vscode/tasks.json b/.vscode/tasks.json index d9f73545..3c9cff72 100644 --- a/.vscode/tasks.json +++ b/.vscode/tasks.json @@ -281,6 +281,8 @@ "-DCMAKE_BUILD_TYPE=RelWithDebInfo", // Ninja color output "-DCMAKE_CXX_FLAGS=-fdiagnostics-color=always", + // mold is significantly faster than the default linkers + "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=mold", "-G", "Ninja", "-B", @@ -305,6 +307,8 @@ "-DCMAKE_BUILD_TYPE=Debug", // Ninja color output "-DCMAKE_CXX_FLAGS=-fdiagnostics-color=always", + // mold is significantly faster than the default linkers + "-DCMAKE_EXE_LINKER_FLAGS=-fuse-ld=mold", "-G", "Ninja", "-B",