From 18984d834638e74cf680ca0f8f675e645cc1c774 Mon Sep 17 00:00:00 2001 From: Try Date: Wed, 25 Sep 2024 01:05:23 +0200 Subject: [PATCH] vsm in progress #681 --- game/graphics/drawcommands.cpp | 1 + game/graphics/renderer.cpp | 15 +- game/graphics/renderer.h | 3 + game/graphics/sceneglobals.cpp | 2 + game/graphics/sceneglobals.h | 2 + game/graphics/worldview.cpp | 3 +- game/graphics/worldview.h | 5 +- shader/virtual_shadow/vsm_alloc_pages.comp | 2 +- shader/virtual_shadow/vsm_clear.comp | 3 + shader/virtual_shadow/vsm_clump_pages.comp | 158 +++++++++++--------- shader/virtual_shadow/vsm_cluster_task.comp | 8 +- shader/virtual_shadow/vsm_mark_pages.comp | 47 +++++- 12 files changed, 164 insertions(+), 85 deletions(-) diff --git a/game/graphics/drawcommands.cpp b/game/graphics/drawcommands.cpp index 21c5c7d8..6417003a 100644 --- a/game/graphics/drawcommands.cpp +++ b/game/graphics/drawcommands.cpp @@ -235,6 +235,7 @@ void DrawCommands::updateTasksUniforms() { } else { i.desc.set(T_Scene, scene.uboGlobal[i.viewport]); i.desc.set(T_Payload, views[i.viewport].vsmClusters); + i.desc.set(T_HiZ, *scene.vsmPageHiZ); i.desc.set(T_VsmPages, *scene.vsmPageList); // i.desc.set(T_PkgOffsets, views[i.viewport].pkgOffsets); } diff --git a/game/graphics/renderer.cpp b/game/graphics/renderer.cpp index 5e367bfd..798ea05f 100644 --- a/game/graphics/renderer.cpp +++ b/game/graphics/renderer.cpp @@ -204,8 +204,9 @@ void Renderer::resetSwapchain() { vsm.pageDbg = device.image2d(TextureFormat::R32U, 32, 32); vsm.pageTbl = device.image3d(TextureFormat::R32U, 32, 32, 16); - // vsm.pageDataCs = device.image2d(TextureFormat::R32U, 4096, 4096); + vsm.pageHiZ = device.image3d(TextureFormat::R32U, 32, 32, 16); vsm.pageData = device.zbuffer(shadowFormat, 4096, 4096); + // vsm.pageDataCs = device.image2d(TextureFormat::R32U, 4096, 4096); const int32_t VSM_PAGE_SIZE = 128; auto pageCount = uint32_t((vsm.pageData.w()+VSM_PAGE_SIZE-1)/VSM_PAGE_SIZE) * uint32_t((vsm.pageData.h()+VSM_PAGE_SIZE-1)/VSM_PAGE_SIZE); @@ -451,6 +452,7 @@ void Renderer::prepareUniforms() { if(settings.vsmEnabled) { vsm.uboClear.set(0, vsm.pageList); vsm.uboClear.set(1, vsm.pageTbl); + vsm.uboClear.set(2, vsm.pageHiZ); if(!vsm.uboClearPages.isEmpty()) vsm.uboClearPages.set(0, vsm.pageDataCs); @@ -460,6 +462,8 @@ void Renderer::prepareUniforms() { vsm.uboPages.set(2, gbufNormal, Sampler::nearest()); vsm.uboPages.set(3, zbuffer, Sampler::nearest()); vsm.uboPages.set(4, vsm.pageTbl); + vsm.uboPages.set(6, vsm.pageHiZ); + //vsm.uboPages.set(7, vsm.pageList); vsm.uboClump.set(0, vsm.pageList); vsm.uboClump.set(1, vsm.pageTbl); @@ -476,6 +480,7 @@ void Renderer::prepareUniforms() { if(!vsm.pageDataCs.isEmpty()) vsm.uboLight.set(5, vsm.pageDataCs); else vsm.uboLight.set(5, vsm.pageData); + vsm.uboLight.set(7, vsm.pageList); vsm.uboDbg.set(0, wview->sceneGlobals().uboGlobal[SceneGlobals::V_Main]); vsm.uboDbg.set(1, gbufDiffuse, Sampler::nearest()); @@ -499,7 +504,7 @@ void Renderer::prepareUniforms() { sh[i] = &textureCast(shadowMap[i]); } wview->setShadowMaps(sh); - wview->setVirtualShadowMap(vsm.pageDataCs, vsm.pageTbl, vsm.pageList); + wview->setVirtualShadowMap(vsm.pageDataCs, vsm.pageTbl, vsm.pageHiZ, vsm.pageList); wview->setSwRenderingImage(swr.outputImage); wview->setHiZ(textureCast(hiz.hiZ)); @@ -774,7 +779,7 @@ void Renderer::drawVsmDbg(Tempest::Encoder& cmd, uint8_t cmd.setFramebuffer({{sceneLinear, Tempest::Preserve, Tempest::Preserve}}); cmd.setDebugMarker("VSM-dbg"); - cmd.setUniforms(*vsm.pagesDbgPso, vsm.uboDbg); + cmd.setUniforms(*vsm.pagesDbgPso, vsm.uboDbg, &settings.vsmMipBias, sizeof(settings.vsmMipBias)); cmd.draw(Resources::fsqVbo()); } @@ -864,7 +869,7 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI cmd.setUniforms(shaders.vsmClear, vsm.uboClear); cmd.dispatchThreads(size_t(vsm.pageTbl.w()), size_t(vsm.pageTbl.h()), size_t(vsm.pageTbl.d())); - cmd.setUniforms(shaders.vsmMarkPages, vsm.uboPages); + cmd.setUniforms(shaders.vsmMarkPages, vsm.uboPages, &settings.vsmMipBias, sizeof(settings.vsmMipBias)); cmd.dispatchThreads(zbuffer.size()); if(vsm.pageDataCs.isEmpty()) { @@ -961,7 +966,7 @@ void Renderer::drawShadowResolve(Encoder& cmd, uint8_t fId, const return; if(settings.vsmEnabled) { cmd.setDebugMarker("DirectSunLight-VSM"); - cmd.setUniforms(*vsm.directLightPso, vsm.uboLight); + cmd.setUniforms(*vsm.directLightPso, vsm.uboLight, &settings.vsmMipBias, sizeof(settings.vsmMipBias)); cmd.draw(Resources::fsqVbo()); return; } diff --git a/game/graphics/renderer.h b/game/graphics/renderer.h index f61c4e57..8e9dd7b3 100644 --- a/game/graphics/renderer.h +++ b/game/graphics/renderer.h @@ -87,6 +87,8 @@ class Renderer final { float zVidGamma = 0.5; float vidResIndex = 0; + + float vsmMipBias = -0.25; } settings; Frustrum frustrum[SceneGlobals::V_Count]; @@ -225,6 +227,7 @@ class Renderer final { Tempest::StorageImage pageDbg; Tempest::StorageImage pageTbl; + Tempest::StorageImage pageHiZ; Tempest::StorageImage pageDataCs; Tempest::ZBuffer pageData; Tempest::StorageBuffer pageList; diff --git a/game/graphics/sceneglobals.cpp b/game/graphics/sceneglobals.cpp index 94cd6cc4..1af219f4 100644 --- a/game/graphics/sceneglobals.cpp +++ b/game/graphics/sceneglobals.cpp @@ -213,9 +213,11 @@ void SceneGlobals::setShadowMap(const Tempest::Texture2d* tex[]) { void SceneGlobals::setVirtualShadowMap(const Tempest::StorageImage& pageData, const Tempest::StorageImage& pageTbl, + const Tempest::StorageImage& pageHiZ, const Tempest::StorageBuffer& pageList) { vsmPageData = &pageData; vsmPageTbl = &pageTbl; + vsmPageHiZ = &pageHiZ; vsmPageList = &pageList; } diff --git a/game/graphics/sceneglobals.h b/game/graphics/sceneglobals.h index 5439c746..deb4384f 100644 --- a/game/graphics/sceneglobals.h +++ b/game/graphics/sceneglobals.h @@ -44,6 +44,7 @@ class SceneGlobals final { void setVirtualShadowMap(const Tempest::StorageImage& vsmPageData, const Tempest::StorageImage& pageTbl, + const Tempest::StorageImage& pageHiZ, const Tempest::StorageBuffer& vsmPageList); void setSwRenderingImage(const Tempest::StorageImage& mainView); @@ -77,6 +78,7 @@ class SceneGlobals final { const Tempest::StorageImage* vsmPageData = nullptr; const Tempest::StorageImage* vsmPageTbl = nullptr; + const Tempest::StorageImage* vsmPageHiZ = nullptr; const Tempest::StorageBuffer* vsmPageList = nullptr; const Tempest::StorageImage* swMainImage = nullptr; diff --git a/game/graphics/worldview.cpp b/game/graphics/worldview.cpp index 03c4b07d..9846f435 100644 --- a/game/graphics/worldview.cpp +++ b/game/graphics/worldview.cpp @@ -87,8 +87,9 @@ void WorldView::setShadowMaps(const Tempest::Texture2d* sh[]) { void WorldView::setVirtualShadowMap(const Tempest::StorageImage& pageData, const Tempest::StorageImage& pageTbl, + const Tempest::StorageImage& pageHiZ, const Tempest::StorageBuffer& pageList) { - sGlobal.setVirtualShadowMap(pageData, pageTbl, pageList); + sGlobal.setVirtualShadowMap(pageData, pageTbl, pageHiZ, pageList); } void WorldView::setSwRenderingImage(const Tempest::StorageImage& mainView) { diff --git a/game/graphics/worldview.h b/game/graphics/worldview.h index d04dcc42..1909e1c4 100644 --- a/game/graphics/worldview.h +++ b/game/graphics/worldview.h @@ -37,7 +37,10 @@ class WorldView { void setGbuffer(const Tempest::Texture2d& diffuse, const Tempest::Texture2d& norm); void setShadowMaps (const Tempest::Texture2d* shadow[]); - void setVirtualShadowMap(const Tempest::StorageImage& pageData, const Tempest::StorageImage& pageTbl, const Tempest::StorageBuffer& pageList); + void setVirtualShadowMap(const Tempest::StorageImage& pageData, + const Tempest::StorageImage& pageTbl, + const Tempest::StorageImage& pageHiZ, + const Tempest::StorageBuffer& pageList); void setSwRenderingImage(const Tempest::StorageImage& mainView); void setHiZ(const Tempest::Texture2d& hiZ); void setSceneImages(const Tempest::Texture2d& clr, const Tempest::Texture2d& depthAux, const Tempest::ZBuffer& depthNative); diff --git a/shader/virtual_shadow/vsm_alloc_pages.comp b/shader/virtual_shadow/vsm_alloc_pages.comp index 1d8d0a3e..ee5f0b79 100644 --- a/shader/virtual_shadow/vsm_alloc_pages.comp +++ b/shader/virtual_shadow/vsm_alloc_pages.comp @@ -243,5 +243,5 @@ void main() { layoutPages(frag, size, 2); layoutPages(frag, size, 1); - horizontalMerge(); + //horizontalMerge(); } diff --git a/shader/virtual_shadow/vsm_clear.comp b/shader/virtual_shadow/vsm_clear.comp index 099981cd..8f5b4e06 100644 --- a/shader/virtual_shadow/vsm_clear.comp +++ b/shader/virtual_shadow/vsm_clear.comp @@ -14,6 +14,7 @@ layout(binding = 0, std430) buffer CB0 { uint pageList[]; }; layout(binding = 1, r32ui) uniform uimage3D pageTbl; +layout(binding = 2, r32ui) uniform uimage3D pageTblDepth; void main() { const ivec3 at = ivec3(gl_GlobalInvocationID); @@ -22,6 +23,8 @@ void main() { return; imageStore(pageTbl, at, uvec4(0)); + imageStore(pageTblDepth, at, uvec4(0xFFFFFFFF)); + if(at==ivec3(0)) { header.pageCount = 0; header.meshletCount = 0; diff --git a/shader/virtual_shadow/vsm_clump_pages.comp b/shader/virtual_shadow/vsm_clump_pages.comp index 41a4d0dd..a2482262 100644 --- a/shader/virtual_shadow/vsm_clump_pages.comp +++ b/shader/virtual_shadow/vsm_clump_pages.comp @@ -14,32 +14,59 @@ layout(binding = 0, std430) buffer Pages { VsmHeader header; uint pageList[]; } layout(binding = 1, r32ui) uniform uimage3D pageTbl; shared uint minY; -shared uint minX [32/2]; -shared uint sWidth[32/2]; +shared uint minX [gl_WorkGroupSize.y/4]; +shared uint sWidth[gl_WorkGroupSize.y/4]; +shared uint data[gl_WorkGroupSize.x][gl_WorkGroupSize.y]; -void storePage(ivec3 at, uint samples, uint size) { - uint d = (samples & 0xFF) | (size << 8); - imageStore(pageTbl, at, uvec4(d)); +uint packPage(uint samples, uint size, uint prime) { + return (samples & 0xFF) | (size << 8) | (prime << 16); } -uvec2 loadPage(ivec3 at) { - uint a = imageLoad(pageTbl, at).r; - return uvec2(a&0xFF, a>>8); +uint packPage(uint samples, uint size) { + return (samples & 0xFF) | (size << 8); } -uint loadPagePrimeSize(ivec3 at) { - uint a = imageLoad(pageTbl, at).r; - return a >> 1; +uint unpackSize(uint a){ + return (a >> 8) & 0xFF; } -uint loadPagePrimeBit(ivec3 at) { - uint a = imageLoad(pageTbl, at).r; - return a & 0x1; +uint unpackSamples(uint a){ + return a&0xFF; } -void storePagePrime(ivec3 at, uint size, uint prime) { - uint d = (size << 1) | prime&0x1; - imageStore(pageTbl, at, uvec4(d)); +uint unpackPrimeBit(uint a){ + return (a >> 16) & 0x1; + } + +void poolPageTable() { + const ivec3 size = imageSize(pageTbl); + const ivec3 at = ivec3(gl_GlobalInvocationID); + const ivec3 id = ivec3(gl_LocalInvocationID); + + if(all(lessThan(at, size))) { + const uint frag = imageLoad(pageTbl, at).r; + //data[id.x][id.y] = frag; + data[at.x][at.y] = packPage(frag, frag, frag); + } else { + data[id.x][id.y] = 0; + } + barrier(); + } + +void storePageTable() { + barrier(); + + const ivec3 size = imageSize(pageTbl); + const ivec3 at = ivec3(gl_GlobalInvocationID); + const ivec3 id = ivec3(gl_LocalInvocationID); + + if(all(lessThan(at, size))) { + uint d = data[at.x][at.y]; + uint p = unpackPrimeBit(d); + //uint s = p>0 ? unpackSize(d) : 0; + uint s = unpackSize(d); + imageStore(pageTbl, at, uvec4((s<<1) | p) ); + } } void main() { @@ -48,6 +75,8 @@ void main() { const ivec3 id = ivec3(gl_LocalInvocationID); const uint lane = gl_LocalInvocationIndex; + poolPageTable(); + if(lane < minX.length()) { minX [lane] = 0xFFFF; sWidth[lane] = 0; @@ -55,10 +84,9 @@ void main() { minY = 0xFFFF; barrier(); - const uint frag = imageLoad(pageTbl, at).r; + const uint frag = data[at.x][at.y]; const bool prime = frag>0; - - if(frag>0) { + if(prime) { atomicMin(minY, at.y); atomicMin(vsm.header.pageBbox[at.z].x, at.x); @@ -68,85 +96,73 @@ void main() { } barrier(); - const uint stripe = (at.y-minY)/2; + const uint stripe = (at.y-minY)/4; if(frag>0) atomicMin(minX[stripe], at.x); barrier(); - if(stripe%2==1 && minX[stripe-1]%2!=minX[stripe]%2) { - if(minX[stripe-1] > minX[stripe]) - minX[stripe-1]--; else - minX[stripe-0]--; - } + const ivec2 tileCoord = ivec2(at.x - minX[stripe], at.y-minY); + if(frag>0) + atomicMax(sWidth[stripe], tileCoord.x + 1); barrier(); - ivec2 tileCoord = ivec2(at.x - minX[stripe], at.y-minY); - if(frag>0) { - uint w = tileCoord.x + 1; - w += w%2; - atomicMax(sWidth[stripe], w); + const bool valid = (0<=tileCoord.x && tileCoord.x4 ? 2 : 3)*2; - uint a = loadPage(at+ivec3(0,0,0)).x; - uint b = loadPage(at+ivec3(2,0,0)).x; - uint c = loadPage(at+ivec3(2,2,0)).x; - uint d = loadPage(at+ivec3(0,2,0)).x; + uint a = unpackSamples(data[at.x+0][at.y+0]); + uint b = unpackSamples(data[at.x+2][at.y+0]); + uint c = unpackSamples(data[at.x+2][at.y+2]); + uint d = unpackSamples(data[at.x+0][at.y+2]); uint s = (a+b+c+d); if(a+b+c+d >= mergeQ) { - storePage(at+ivec3(0,0,0), s, 4); - storePage(at+ivec3(2,0,0), 0, 0); - storePage(at+ivec3(0,2,0), 0, 0); - storePage(at+ivec3(2,2,0), 0, 0); + data[at.x+0][at.y+0] = packPage(s, 4); + data[at.x+2][at.y+0] = packPage(0, 0); + data[at.x+0][at.y+2] = packPage(0, 0); + data[at.x+2][at.y+2] = packPage(0, 0); } } + barrier(); // prime bit - memoryBarrierImage(); - barrier(); { - uint data = loadPage(at).y; - data = (data << 1) | (prime ? 1 : 0); - imageStore(pageTbl, at, uvec4(data)); + uint d = unpackSamples(data[at.x][at.y]); + uint s = unpackSize (data[at.x][at.y]); + data[at.x][at.y] = packPage(d ,s, prime ? 1 : 0); } - - // demote unfilled quads to 1x1 pages - memoryBarrierImage(); barrier(); + // demote unfilled quads to 1x1 pages if(true && quad) { - uint s = loadPagePrimeSize(at); - uint a = loadPagePrimeBit (at+ivec3(0,0,0)); - uint b = loadPagePrimeBit (at+ivec3(1,0,0)); - uint c = loadPagePrimeBit (at+ivec3(1,1,0)); - uint d = loadPagePrimeBit (at+ivec3(0,1,0)); + uint s = unpackSize (data[at.x][at.y]); + uint a = unpackPrimeBit(data[at.x+0][at.y+0]); + uint b = unpackPrimeBit(data[at.x+1][at.y+0]); + uint c = unpackPrimeBit(data[at.x+1][at.y+1]); + uint d = unpackPrimeBit(data[at.x+0][at.y+1]); if(s==2 && a+b+c+d <= 2) { - storePagePrime(at+ivec3(0,0,0), a, a); - storePagePrime(at+ivec3(1,0,0), b, b); - storePagePrime(at+ivec3(1,1,0), c, c); - storePagePrime(at+ivec3(0,1,0), d, d); + data[at.x+0][at.y+0] = packPage(1, a, a); + data[at.x+1][at.y+0] = packPage(1, b, b); + data[at.x+1][at.y+1] = packPage(1, c, c); + data[at.x+0][at.y+1] = packPage(1, d, d); } } - } + storePageTable(); + } /* void trimMip(int mip) { diff --git a/shader/virtual_shadow/vsm_cluster_task.comp b/shader/virtual_shadow/vsm_cluster_task.comp index 76beb549..fa47c3e7 100644 --- a/shader/virtual_shadow/vsm_cluster_task.comp +++ b/shader/virtual_shadow/vsm_cluster_task.comp @@ -64,7 +64,8 @@ bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMin) { float Rx = R * smWidthInv; float Ry = R * smWidthInv; - aabb = vec4(c.xy-vec2(Rx,Ry), c.xy+vec2(Rx,Ry)); + aabb = vec4(c.xy-vec2(Rx,Ry), c.xy+vec2(Rx,Ry)); + depthMin = c.z + Rz; // aabb = c.xyxy; return true; } @@ -181,7 +182,6 @@ void _runCluster(const uint clusterId) { continue; if(!pageBoundsTest(aabb, page, sz)) continue; - const uint p = imageLoad(pageTbl, ivec3(page)).x; #if !defined(VSM_ATOMIC) if(!emitCluster(cluster, i)) break; @@ -261,6 +261,10 @@ void runCluster(const uint clusterId) { const ivec2 sz = unpackVsmPageSize(data); if(!pageBoundsTest(aabb, page, sz)) continue; + const uint p = imageLoad(pageTbl, ivec3(page)).x; + const float f = uintBitsToFloat(p); + if(depthMin < f) + ;//continue; if(!emitCluster(cluster, i)) break; } diff --git a/shader/virtual_shadow/vsm_mark_pages.comp b/shader/virtual_shadow/vsm_mark_pages.comp index 40012832..c1b5cdc2 100644 --- a/shader/virtual_shadow/vsm_mark_pages.comp +++ b/shader/virtual_shadow/vsm_mark_pages.comp @@ -26,6 +26,9 @@ const vec3 debugColors[] = { vec3(0,0.5,1), }; +layout(std140, push_constant) uniform Push { + float vsmMipBias; + }; layout(binding = 0, std140) uniform UboScene { SceneDesc scene; }; @@ -42,6 +45,12 @@ layout(binding = 5) uniform utexture2D pageData; layout(binding = 5) uniform texture2D pageData; # endif #endif +layout(binding = 6, r32ui) uniform uimage3D pageTblDepth; + +#if defined(COMPOSE) || defined(DEBUG) +layout(binding = 7, std430) readonly buffer Pages { VsmHeader header; uint pageList[]; } vsm; +#endif + #if defined(COMPOSE) || defined(DEBUG) layout(location = 0) out vec4 outColor; @@ -53,6 +62,30 @@ ivec2 pixelPos = ivec2(gl_GlobalInvocationID.xy); ivec2 pixelPos = ivec2(gl_FragCoord.xy); #endif +float drawInt(in vec2 where, in int n) { + const float RESOLUTION = 0.5; + int i=int((where*=RESOLUTION).y); + if(00 || n>0; n/=10) + if ((j+=4)<3 && j>=0) { + int x = 0; + if(i>4) + x = 972980223; + else if(i>3) + x = 690407533; + else if(i>2) + x = 704642687; + else if(i>1) + x = 696556137; + else + x = 972881535; + return float(x >> (29-j-(n%10)*3)&1); + } + } + return 0; + } + uint hash(uvec3 gridPos) { return (gridPos.x * 18397) + (gridPos.y * 20483) + (gridPos.z * 29303); } @@ -84,8 +117,9 @@ int shadowLod(vec2 dx, vec2 dy) { float maxLod = 0.5 * log2(max(px, py)); // log2(sqrt()) = 0.5*log2() float minLod = 0.5 * log2(min(px, py)); - const float bias = -1; - return max(0, int((minLod + maxLod)*0.5 + bias + 0.5)); + const float bias = vsmMipBias; + //return max(0, int((minLod + maxLod)*0.5 + bias + 0.5)); + return max(0, int(minLod + bias + 0.5)); } float shadowTexelFetch(vec2 page, int mip) { @@ -119,10 +153,11 @@ float shadowTest(vec2 page, int mip, in float refZ, bool isATest) { return z