From 4e096ffaa2bb0231d2a5764858e3e0d292dd4d69 Mon Sep 17 00:00:00 2001 From: Try Date: Tue, 17 Sep 2024 18:58:21 +0200 Subject: [PATCH] vsm: configurable page size; sw-shadows initial #681 --- game/camera.cpp | 2 +- game/graphics/drawcommands.cpp | 33 +- game/graphics/drawcommands.h | 2 + game/graphics/renderer.cpp | 4 +- game/graphics/shaders.cpp | 2 +- game/graphics/shaders.h | 2 + shader/materials/materials_common.glsl | 22 +- shader/software_rendering/sw_light.comp | 2 +- shader/virtual_shadow/vsm_clump_pages.comp | 65 +--- shader/virtual_shadow/vsm_cluster_task.comp | 2 +- shader/virtual_shadow/vsm_common.glsl | 6 +- shader/virtual_shadow/vsm_rendering.comp | 400 ++++++++++---------- shader/virtual_shadow/vsm_sort_pages.comp | 24 +- 13 files changed, 267 insertions(+), 299 deletions(-) diff --git a/game/camera.cpp b/game/camera.cpp index 867eb31c..855f7934 100644 --- a/game/camera.cpp +++ b/game/camera.cpp @@ -281,7 +281,7 @@ Matrix4x4 Camera::viewShadowVsmLwc(const Tempest::Vec3& ldir) const { Matrix4x4 Camera::mkViewShadowVsm(const Vec3& cameraPos, const Vec3& ldir) const { float smWidth = 1024; // ~4 pixels per santimeter - float smDepth = 5*5120; + float smDepth = 10*5120; float smWidthInv = 1.f/smWidth; float zScale = 1.f/smDepth; diff --git a/game/graphics/drawcommands.cpp b/game/graphics/drawcommands.cpp index d85c2c90..9c92ad2b 100644 --- a/game/graphics/drawcommands.cpp +++ b/game/graphics/drawcommands.cpp @@ -69,6 +69,8 @@ DrawCommands::DrawCommands(VisualObjects& owner, DrawBuckets& buckets, DrawClust Tempest::DispatchIndirectCommand cmd = {2000,1,1}; vsmIndirectCmd = Resources::device().ssbo(&cmd, sizeof(cmd)); } + // vsmSwrImage = Resources::device().image2d(TextureFormat::R16, 4096, 4096); + // vsmSwrImage = Resources::device().image2d(TextureFormat::R32U, 4096, 4096); } DrawCommands::~DrawCommands() { @@ -349,10 +351,7 @@ void DrawCommands::updateCommandUniforms() { } void DrawCommands::updateVsmUniforms() { - if(Gothic::options().swRenderingPreset==0) - return; - - if(scene.swMainImage==nullptr) + if(Gothic::options().swRenderingPreset==0 && Gothic::options().doVirtualShadow==false) return; auto& device = Resources::device(); @@ -377,7 +376,7 @@ void DrawCommands::updateVsmUniforms() { } const uint32_t preset = Gothic::options().swRenderingPreset; - if(preset>0 && !Shaders::inst().swRendering.isEmpty()) { + if(preset>0 && !Shaders::inst().swRendering.isEmpty() && scene.swMainImage!=nullptr) { Resources::recycle(std::move(swrDesc)); swrDesc = device.descriptors(Shaders::inst().swRendering); swrDesc.set(0, *scene.swMainImage); @@ -394,6 +393,21 @@ void DrawCommands::updateVsmUniforms() { swrDesc.set(10, owner.instanceSsbo()); } } + + if(false && Gothic::options().doVirtualShadow && scene.vsmPageList!=nullptr) { + Resources::recycle(std::move(vsmDesc)); + vsmDesc = device.descriptors(Shaders::inst().vsmRendering); + + vsmDesc.set(0, vsmSwrImage); + vsmDesc.set(1, scene.uboGlobal[SceneGlobals::V_Vsm]); + vsmDesc.set(2, *scene.vsmPageList); + vsmDesc.set(3, clusters.ssbo()); + vsmDesc.set(4, owner.instanceSsbo()); + vsmDesc.set(5, ibo); + vsmDesc.set(6, vbo); + vsmDesc.set(7, tex); + vsmDesc.set(8, Sampler::bilinear()); + } } void DrawCommands::prepareUniforms() { @@ -517,6 +531,15 @@ void DrawCommands::drawVsm(Tempest::Encoder& cmd, uint8_ cmd.dispatchMeshIndirect(view.indirectCmd, sizeof(IndirectCmd)*id + sizeof(uint32_t)); else cmd.drawIndirect(view.indirectCmd, sizeof(IndirectCmd)*id); } + + if(false) { + cmd.setFramebuffer({}); + struct Push { uint32_t meshletCount; } push = {}; + push.meshletCount = uint32_t(clusters.size()); + cmd.setUniforms(Shaders::inst().vsmRendering, vsmDesc, &push, sizeof(push)); + // const auto sz = Shaders::inst().vsmRendering.workGroupSize(); + cmd.dispatch(1024u); + } } void DrawCommands::drawHiZ(Tempest::Encoder& cmd, uint8_t fId) { diff --git a/game/graphics/drawcommands.h b/game/graphics/drawcommands.h index 5e60cadc..f7902e5e 100644 --- a/game/graphics/drawcommands.h +++ b/game/graphics/drawcommands.h @@ -147,4 +147,6 @@ class DrawCommands { View views[SceneGlobals::V_Count]; Tempest::StorageBuffer vsmIndirectCmd; + Tempest::StorageImage vsmSwrImage; + Tempest::DescriptorSet vsmDesc; }; diff --git a/game/graphics/renderer.cpp b/game/graphics/renderer.cpp index d7e6fa86..c91eefe5 100644 --- a/game/graphics/renderer.cpp +++ b/game/graphics/renderer.cpp @@ -207,7 +207,8 @@ void Renderer::resetSwapchain() { vsm.pageData = device.zbuffer(shadowFormat, 4096, 4096); vsm.shadowMask = device.image2d(Tempest::RGBA8, w, h); - auto pageCount = uint32_t((vsm.pageData.w()+128-1)/128) * uint32_t((vsm.pageData.h()+128-1)/128); + const int32_t VSM_PAGE_SIZE = 128; + auto pageCount = uint32_t((vsm.pageData.w()+VSM_PAGE_SIZE-1)/VSM_PAGE_SIZE) * uint32_t((vsm.pageData.h()+VSM_PAGE_SIZE-1)/VSM_PAGE_SIZE); vsm.pageList = device.ssbo(nullptr, (pageCount + 4)*sizeof(uint32_t)); } @@ -858,6 +859,7 @@ void Renderer::drawVsm(Tempest::Encoder& cmd, uint8_t fI cmd.setUniforms(*vsm.pagesMarkPso, vsm.uboPages); cmd.dispatchThreads(zbuffer.size()); + //TODO: trimming //cmd.setUniforms(Shaders::inst().vsmClumpPages0, vsm.uboList); //cmd.dispatch(1); diff --git a/game/graphics/shaders.cpp b/game/graphics/shaders.cpp index d7cd64c6..580d3870 100644 --- a/game/graphics/shaders.cpp +++ b/game/graphics/shaders.cpp @@ -206,7 +206,7 @@ Shaders::Shaders() { vsmPackDraw1 = computeShader("vsm_pack_draws1.comp.sprv"); vsmDirectLight = postEffect("copy", "direct_light_vsm", RenderState::ZTestMode::NoEqual); vsmDbg = postEffect("copy", "vsm_dbg", RenderState::ZTestMode::Always); - // vsmRendering = computeShader("vsm_rendering.comp.sprv"); + vsmRendering = computeShader("vsm_rendering.comp.sprv"); } if(Gothic::options().swRenderingPreset>0) { diff --git a/game/graphics/shaders.h b/game/graphics/shaders.h index 8ec004c0..b5df2a8b 100644 --- a/game/graphics/shaders.h +++ b/game/graphics/shaders.h @@ -83,6 +83,8 @@ class Shaders { Tempest::ComputePipeline vsmSortPages; Tempest::RenderPipeline vsmDbg; + Tempest::ComputePipeline vsmRendering; + // Software rendering Tempest::ComputePipeline swRendering; Tempest::RenderPipeline swRenderingDbg; diff --git a/shader/materials/materials_common.glsl b/shader/materials/materials_common.glsl index b719a77e..ec605dfd 100644 --- a/shader/materials/materials_common.glsl +++ b/shader/materials/materials_common.glsl @@ -38,17 +38,11 @@ const vec3 debugColors[MAX_DEBUG_COLORS] = { }; #endif -#define MAX_NUM_SKELETAL_NODES 96 -#define MAX_MORPH_LAYERS 4 -#define MaxVert 64 -#define MaxPrim 64 -#define MaxInd (MaxPrim*3) - -#define T_LANDSCAPE 0 -#define T_OBJ 1 -#define T_SKINING 2 -#define T_MORPH 3 -#define T_PFX 4 +const uint MAX_NUM_SKELETAL_NODES = 96; +const uint MAX_MORPH_LAYERS = 4; +const uint MaxVert = 64; +const uint MaxPrim = 64; +const uint MaxInd = MaxPrim*3; const uint L_Scene = 0; const uint L_Payload = 1; @@ -68,6 +62,12 @@ const uint L_GDepth = 13; const uint L_CmdOffsets = 14; const uint L_VsmPages = 15; +#define T_LANDSCAPE 0 +#define T_OBJ 1 +#define T_SKINING 2 +#define T_MORPH 3 +#define T_PFX 4 + #ifndef MESH_TYPE #define MESH_TYPE 255 #endif diff --git a/shader/software_rendering/sw_light.comp b/shader/software_rendering/sw_light.comp index 25461040..dd30dab4 100644 --- a/shader/software_rendering/sw_light.comp +++ b/shader/software_rendering/sw_light.comp @@ -68,7 +68,7 @@ layout(push_constant, std430) uniform UboPush { } push; //layout(binding = 0, rgba8) uniform image2D outputImage; -layout(binding = 0, r32ui) uniform uimage2D outputImage; +layout(binding = 0, r32ui) uniform uimage2D outputImage; layout(binding = 1, std140) uniform UboScene { SceneDesc scene; }; diff --git a/shader/virtual_shadow/vsm_clump_pages.comp b/shader/virtual_shadow/vsm_clump_pages.comp index 0994e1cc..48042391 100644 --- a/shader/virtual_shadow/vsm_clump_pages.comp +++ b/shader/virtual_shadow/vsm_clump_pages.comp @@ -36,7 +36,7 @@ void trimMip(int mip) { uint b = imageLoad(pageTbl, at+ivec3(1,0,0)).r; uint c = imageLoad(pageTbl, at+ivec3(0,1,0)).r; uint d = imageLoad(pageTbl, at+ivec3(1,1,0)).r; - if(a>0 && b>0 && c>0 && d>0) { + if(a+b+c+d == 4) { imageStore(pageTbl, ax, uvec4(0)); } } @@ -62,7 +62,7 @@ void mainGroups() { uint b = imageLoad(pageTbl, at+ivec3(1,0,0)).r; uint c = imageLoad(pageTbl, at+ivec3(1,1,0)).r; uint d = imageLoad(pageTbl, at+ivec3(0,1,0)).r; - if(a==1 && b==1 && c==1 && d==1) { + if(a+b+c+d >= 2) { imageStore(pageTbl, at+ivec3(0,0,0), uvec4(2)); imageStore(pageTbl, at+ivec3(1,0,0), uvec4(0)); imageStore(pageTbl, at+ivec3(0,1,0), uvec4(0)); @@ -77,7 +77,7 @@ void mainGroups() { uint b = imageLoad(pageTbl, at+ivec3(2,0,0)).r; uint c = imageLoad(pageTbl, at+ivec3(2,2,0)).r; uint d = imageLoad(pageTbl, at+ivec3(0,2,0)).r; - if(a==2 && b==2 && c==2 && d==2) { + if(a+b+c+d >= 3) { imageStore(pageTbl, at+ivec3(0,0,0), uvec4(4)); imageStore(pageTbl, at+ivec3(2,0,0), uvec4(0)); imageStore(pageTbl, at+ivec3(0,2,0), uvec4(0)); @@ -95,62 +95,3 @@ void main() { #error "invalid pass-id" #endif } - -/* -shared uint pageVal[gl_WorkGroupSize.x][gl_WorkGroupSize.y]; -void main2() { - const ivec3 size = imageSize(pageTbl); - const ivec3 at = ivec3(gl_GlobalInvocationID); - const ivec3 id = ivec3(gl_LocalInvocationID); - -#if 0 - if(at.z==1) { - uint pageS = 0; - if(all(lessThan(at, size))) - pageS = imageLoad(pageTbl, at).r; - - const int step = VSM_PAGE_TBL_SIZE/4; - if(all(lessThan(ivec2(step), at.xy)) && all(lessThan(at.xy, ivec2(VSM_PAGE_TBL_SIZE-step)))) { - ivec3 x = ivec3(ivec2(at.xy - step)*2, 0); - - bool t = (imageLoad(pageTbl, x+ivec3(0,0,0)).r>0) && - (imageLoad(pageTbl, x+ivec3(1,0,0)).r>0) && - (imageLoad(pageTbl, x+ivec3(0,1,0)).r>0) && - (imageLoad(pageTbl, x+ivec3(1,1,0)).r>0); - - if(t) - ;//imageStore(pageTbl, at, uvec4(0)); - } - } -#endif - - uint pageS = 0; - if(all(lessThan(at, size))) - pageS = imageLoad(pageTbl, at).r; - pageVal[id.x][id.y] = pageS; - barrier(); - - // only 2x2 for now - if(id.x%2==0 && id.y%2==0) { - uint pageL = pageVal[id.x+1][id.y+0]; - uint pageR = pageVal[id.x+0][id.y+1]; - uint pageD = pageVal[id.x+1][id.y+1]; - - if(pageS==1 && pageL==1 && pageR==1 && pageD==1) { - pageS = 2; - pageVal[id.x+0][id.y+0] = 2; - pageVal[id.x+1][id.y+0] = 0; - pageVal[id.x+0][id.y+1] = 0; - pageVal[id.x+1][id.y+1] = 0; - } - } - barrier(); - - if(pageS>1) { - imageStore(pageTbl, at+ivec3(0,0,0), uvec4(pageS,0,0,0)); - imageStore(pageTbl, at+ivec3(1,0,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(0,1,0), uvec4(0)); - imageStore(pageTbl, at+ivec3(1,1,0), uvec4(0)); - } - } -*/ diff --git a/shader/virtual_shadow/vsm_cluster_task.comp b/shader/virtual_shadow/vsm_cluster_task.comp index b7aabe59..cc2b2c7f 100644 --- a/shader/virtual_shadow/vsm_cluster_task.comp +++ b/shader/virtual_shadow/vsm_cluster_task.comp @@ -56,7 +56,7 @@ bool projectSphere(const vec4 sphere, out vec4 aabb, out float depthMin) { const float R = sphere.w; const float smWidthInv = 1.0/(1024.0); // NOTE: from camera.cpp - const float zScale = 1.0/(5*5120.0); + const float zScale = 1.0/(10*5120.0); float Rz = R * zScale; if(c.z - Rz > 1 || c.z + Rz < 0) diff --git a/shader/virtual_shadow/vsm_common.glsl b/shader/virtual_shadow/vsm_common.glsl index 69d7b014..672242c3 100644 --- a/shader/virtual_shadow/vsm_common.glsl +++ b/shader/virtual_shadow/vsm_common.glsl @@ -1,10 +1,10 @@ #ifndef VSM_COMMON_GLSL #define VSM_COMMON_GLSL -const int VSM_MAX_PAGES = 1024; const int VSM_PAGE_SIZE = 128; -const int VSM_PAGE_TBL_SIZE = 32; // small for testing, 64 can be better -const int VSM_PAGE_PER_ROW = 32; +const int VSM_PAGE_TBL_SIZE = 32; // small for testing, 64 can be better +const int VSM_PAGE_PER_ROW = 4096/VSM_PAGE_SIZE; +const int VSM_MAX_PAGES = VSM_PAGE_PER_ROW * VSM_PAGE_PER_ROW; // 1024; const int VSM_CLIPMAP_SIZE = VSM_PAGE_SIZE * VSM_PAGE_TBL_SIZE; struct VsmHeader { diff --git a/shader/virtual_shadow/vsm_rendering.comp b/shader/virtual_shadow/vsm_rendering.comp index d63fa28d..bd5e766a 100644 --- a/shader/virtual_shadow/vsm_rendering.comp +++ b/shader/virtual_shadow/vsm_rendering.comp @@ -1,6 +1,6 @@ #version 450 -layout(local_size_x = 8, local_size_y = 8) in; +layout(local_size_x = 16, local_size_y = 16) in; #extension GL_GOOGLE_include_directive : enable #extension GL_ARB_separate_shader_objects : enable @@ -9,17 +9,18 @@ layout(local_size_x = 8, local_size_y = 8) in; // bindless #extension GL_EXT_nonuniform_qualifier : enable +#include "virtual_shadow/vsm_common.glsl" #include "common.glsl" #include "scene.glsl" -const uint UINT_MAX = 0xFFFFFFFF; -const int PAGE_SIZE = 128; -const int PAGE_TBL_SIZE = 128; -const int CLIPMAP_SIZE = PAGE_SIZE * PAGE_TBL_SIZE; -const uint GroupSize = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; +const uint NumThreads = gl_WorkGroupSize.x*gl_WorkGroupSize.y*gl_WorkGroupSize.z; +const uint NumTilesX = VSM_PAGE_SIZE/gl_WorkGroupSize.x; +const uint NumTilesY = VSM_PAGE_SIZE/gl_WorkGroupSize.y; +const uint NumTiles = NumTilesX * NumTilesY; const uint MaxVert = 64; const uint MaxPrim = 64; const uint MaxInd = (MaxPrim*3); +const uint MaxMesh = 1;//NumThreads / 64; const vec3 debugColors[] = { vec3(1,1,1), @@ -39,6 +40,14 @@ struct Vertex { vec2 uv; }; +struct Instance { + mat4x3 mat; + float fatness; + uint animPtr; + uint padd0; + uint padd1; + }; + struct Cluster { vec4 sphere; uint bucketId_commandId; @@ -48,70 +57,26 @@ struct Cluster { }; layout(push_constant, std430) uniform UboPush { - uint firstMeshlet; - int meshletCount; - float znear; + int meshletCount; } push; -layout(binding = 0, std140) uniform UboScene { - SceneDesc scene; - }; -layout(binding = 1, std430) readonly buffer CB0 { - uvec4 header; - uint pageList[]; - }; -layout(binding = 2, rgba8) uniform image2D pageData; - -layout(binding = 3, std430) readonly buffer Cbo { Cluster clusters[]; }; -layout(binding = 4, std430) readonly buffer Ibo { uint indexes []; } ibo[]; -layout(binding = 5, std430) readonly buffer Vbo { float vertices[]; } vbo[]; -layout(binding = 6) uniform texture2D textureMain[]; -layout(binding = 7) uniform sampler samplerMain; - -// -uvec2 pageImageAt = uvec2(0,0); -vec4 fragData = vec4(0.1, 0.4, 0.4, 0.0); - -// -shared uint clustersIterator; -shared uint clusterVisBits[(GroupSize+31)/32]; -shared uint clusterCurrent; - -shared uint meshletQueue; - -shared Vertex meshVertices[MaxVert]; -shared uvec3 primitiveIndices[MaxPrim]; -shared uint maxVertices, maxPrimitives, meshBucket; - -// -uint hash(uvec3 gridPos) { - return (gridPos.x * 18397) + (gridPos.y * 20483) + (gridPos.z * 29303); - } - -float edgeFunction(const vec2 a, const vec2 b, const vec2 c) { - return (c.x - a.x) * (b.y - a.y) - (c.y - a.y) * (b.x - a.x); - } - -void clearPage(vec4 f) { - for(uint x=0; x=clusters.length()) - return false; - - const Cluster c = clusters[i]; - if(c.instanceId!=UINT_MAX) - return false; // only landscape for now - if(c.sphere.w<=0.f) - return false; // disabled or deleted - - vec4 pos = scene.viewVirtualShadow * vec4(c.sphere.xyz,1); - pos.xy /= (1 << mip); +Vertex pullVertexA(const uint instanceId, const uint meshletId, const uint bucketId, const uint laneId) { + const Instance obj = pullInstance(instanceId); + if(obj.animPtr==0) { + vec3 dpos = vec3(0); //TODO + Vertex ret = pullVertex(meshletId, bucketId, laneId); + ret.pos = obj.mat*vec4(ret.pos,1.0) + dpos; + ret.pos = vec3(0); //pos; + return ret; + } - if(pos.x < viewPort.x || viewPort.z < pos.x || - pos.y < viewPort.y || viewPort.w < pos.y) - return false; + nonuniformEXT uint bId = bucketId; + uint vboOffset = meshletId * MaxVert + laneId; - return true; + // skinned geometry + uint id = vboOffset * 23; + vec3 normal = vec3(vbo[bId].vertices[id + 0], vbo[bId].vertices[id + 1], vbo[bId].vertices[id + 2]); + vec2 uv = vec2(vbo[bId].vertices[id + 3], vbo[bId].vertices[id + 4]); + //uint color = floatBitsToUint(vbo[bId].vertices[id + 5]); + vec3 pos0 = vec3(vbo[bId].vertices[id + 6], vbo[bId].vertices[id + 7], vbo[bId].vertices[id + 8]); + vec3 pos1 = vec3(vbo[bId].vertices[id + 9], vbo[bId].vertices[id + 10], vbo[bId].vertices[id + 11]); + vec3 pos2 = vec3(vbo[bId].vertices[id + 12], vbo[bId].vertices[id + 13], vbo[bId].vertices[id + 14]); + vec3 pos3 = vec3(vbo[bId].vertices[id + 15], vbo[bId].vertices[id + 16], vbo[bId].vertices[id + 17]); + uvec4 boneId = uvec4(unpackUnorm4x8(floatBitsToUint(vbo[bId].vertices[id + 18]))*255.0) + uvec4(obj.animPtr); + vec4 weight = vec4(vbo[bId].vertices[id + 19], vbo[bId].vertices[id + 20], vbo[bId].vertices[id + 21], vbo[bId].vertices[id + 22]); + + normal = obj.mat*vec4(normal,0); + + // Position + vec3 pos = vec3(0); + vec3 dpos = normal*obj.fatness; + { + const vec3 t0 = (pullMatrix(boneId.x)*vec4(pos0,1.0)).xyz; + const vec3 t1 = (pullMatrix(boneId.y)*vec4(pos1,1.0)).xyz; + const vec3 t2 = (pullMatrix(boneId.z)*vec4(pos2,1.0)).xyz; + const vec3 t3 = (pullMatrix(boneId.w)*vec4(pos3,1.0)).xyz; + pos = (t0*weight.x + t1*weight.y + t2*weight.z + t3*weight.w) + dpos; } -void mainMesh(uint meshletId, uint bucketId, uint mip) { - const uvec2 mesh = pullMeshlet(meshletId, bucketId); - const uint vertCount = mesh.x; - const uint primCount = mesh.y; - - const uint laneID = gl_LocalInvocationIndex; - - // Alloc outputs - setMeshOutputs(vertCount, primCount, bucketId); - if(laneID 1 || c.z + Rz < 0) + return false; - [[unroll]] - for(uint i=0; i> 16, mip); - barrier(); // make sure data is ready for raster - return true; + numMeshlets = min(numMeshlets, meshletList.length()); + barrier(); } -void processRenderingTile(const uint pageId, const uint mip, uvec2 pix) { - nonuniformEXT uint bucket = meshBucket; - const vec2 uv = vec2(pix)/PAGE_SIZE; - //fragData = texture(sampler2D(textureMain[bucket], samplerMain), uv); - - const uvec4 pageDesc = unpackUint4x8(pageList[pageId]); - const vec2 sp = (vec2(pageDesc.xy * PAGE_SIZE + pix + 0.5)/vec2(PAGE_SIZE * PAGE_TBL_SIZE))*2.0-1.0; - for(uint i=0; i<1 && i> 16, gl_LocalInvocationIndex); + barrier(); + } } void main() { - meshletQueue = -1; - clustersIterator = 0; - barrier(); + const uint pageId = gl_WorkGroupID.x; + if(pageId >= vsm.header.pageCount) + return; // uniform - const uint pageId = gl_WorkGroupID.x; - const uvec4 pageDesc = unpackUint4x8(pageList[pageId]); - const uint mip = pageDesc.z; - const vec4 viewPort = ((vec4(pageDesc.xy, pageDesc.xy+1)/float(PAGE_TBL_SIZE))*2.0-1.0); - pageImageAt = uvec2(pageId%32, pageId/32)*PAGE_SIZE; + numMeshlets = 0; + barrier(); - clearPage(); + const ivec2 pageCoord = ivec2(pageId%32, pageId/32)*VSM_PAGE_SIZE; - int count = 0; - while(processClusters(viewPort, mip)) { - while(processMeshlets(mip)) { - processRendering(viewPort, pageId, mip); - } - } + gatherMeshlets(vsm.pageList[pageId]); + processMeshlets(); - // clearPage(unpackUnorm4x8(count)); //debug - processRendering(viewPort, pageId, mip); + const uint tileId = 0; + const ivec2 fragCoord = ivec2(tileId%NumTilesX, tileId/NumTilesX)*ivec2(gl_WorkGroupSize.xy) + ivec2(gl_LocalInvocationID.xy); + imageStore(outputImage, pageCoord+fragCoord, uvec4(numMeshlets)); } diff --git a/shader/virtual_shadow/vsm_sort_pages.comp b/shader/virtual_shadow/vsm_sort_pages.comp index 8444e554..663f70e1 100644 --- a/shader/virtual_shadow/vsm_sort_pages.comp +++ b/shader/virtual_shadow/vsm_sort_pages.comp @@ -66,46 +66,48 @@ void sizeSort() { atomicAdd(pageCount1x1, 1); barrier(); - const uint offset4x4 = ((pageCount4x4+ 7)/ 8)*4*32; - const uint offset2x2 = ((pageCount2x2+15)/16)*2*32; + const uint PAGE_2x2 = VSM_PAGE_PER_ROW/2; + const uint PAGE_4x4 = VSM_PAGE_PER_ROW/4; + const uint offset4x4 = ((pageCount4x4+PAGE_4x4-1)/PAGE_4x4)*4*VSM_PAGE_PER_ROW; + const uint offset2x2 = ((pageCount2x2+PAGE_2x2-1)/PAGE_2x2)*2*VSM_PAGE_PER_ROW; if(s.x==4) { uint lid = (id - pageCount2x2 - pageCount1x1); - uint row = (lid/8), column = (lid%8); - uint nId = row*32*4 + column*4; + uint row = (lid/PAGE_4x4), column = (lid%PAGE_4x4); + uint nId = row*VSM_PAGE_PER_ROW*4 + column*4; pageList[nId+0] = v; pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,0,0), ivec2(0)); pageList[nId+2] = packVsmPageInfo(inf+ivec3(2,0,0), ivec2(0)); pageList[nId+3] = packVsmPageInfo(inf+ivec3(3,0,0), ivec2(0)); - nId+=32; + nId += VSM_PAGE_PER_ROW; pageList[nId+0] = packVsmPageInfo(inf+ivec3(0,1,0), ivec2(0)); pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,1,0), ivec2(0)); pageList[nId+2] = packVsmPageInfo(inf+ivec3(2,1,0), ivec2(0)); pageList[nId+3] = packVsmPageInfo(inf+ivec3(3,1,0), ivec2(0)); - nId+=32; + nId += VSM_PAGE_PER_ROW; pageList[nId+0] = packVsmPageInfo(inf+ivec3(0,2,0), ivec2(0)); pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,2,0), ivec2(0)); pageList[nId+2] = packVsmPageInfo(inf+ivec3(2,2,0), ivec2(0)); pageList[nId+3] = packVsmPageInfo(inf+ivec3(3,2,0), ivec2(0)); - nId+=32; + nId += VSM_PAGE_PER_ROW; pageList[nId+0] = packVsmPageInfo(inf+ivec3(0,3,0), ivec2(0)); pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,3,0), ivec2(0)); pageList[nId+2] = packVsmPageInfo(inf+ivec3(2,3,0), ivec2(0)); pageList[nId+3] = packVsmPageInfo(inf+ivec3(3,3,0), ivec2(0)); - nId+=32; + nId += VSM_PAGE_PER_ROW; } barrier(); if(s.x==2) { uint lid = (id-pageCount1x1); - uint row = (lid/16), column = (lid%16); - uint nId = row*32*2 + column*2 + offset4x4; + uint row = (lid/PAGE_2x2), column = (lid%PAGE_2x2); + uint nId = row*VSM_PAGE_PER_ROW*2 + column*2 + offset4x4; pageList[nId+0] = v; // pageList[nId+0] = packVsmPageInfo(inf+ivec3(0,0,0), s); pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,0,0), ivec2(0)); - nId+=32; + nId += VSM_PAGE_PER_ROW; pageList[nId+0] = packVsmPageInfo(inf+ivec3(0,1,0), ivec2(0)); pageList[nId+1] = packVsmPageInfo(inf+ivec3(1,1,0), ivec2(0)); }