Skip to content

Commit

Permalink
Joining the occult
Browse files Browse the repository at this point in the history
- Implement hi-z occlusion culling
- Generate depth pyramid just before issuing cull dispatches
- Currently use raw texel fetches but this may be causing loss
- Add _flw_cullData to frame uniforms
  • Loading branch information
Jozufozu committed Sep 4, 2024
1 parent 2537584 commit ec45287
Show file tree
Hide file tree
Showing 8 changed files with 251 additions and 6 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private static final ResourceLocation CULL_SHADER_MAIN = Flywheel.rl("internal/indirect/cull.glsl");
private static final ResourceLocation APPLY_SHADER_MAIN = Flywheel.rl("internal/indirect/apply.glsl");
private static final ResourceLocation SCATTER_SHADER_MAIN = Flywheel.rl("internal/indirect/scatter.glsl");
private static final ResourceLocation DEPTH_REDUCE_SHADER_MAIN = Flywheel.rl("internal/indirect/depth_reduce.glsl");

private static final Compile<InstanceType<?>> CULL = new Compile<>();
private static final Compile<ResourceLocation> UTIL = new Compile<>();
Expand All @@ -44,12 +45,14 @@ public class IndirectPrograms extends AtomicReferenceCounted {
private final Map<InstanceType<?>, GlProgram> culling;
private final GlProgram apply;
private final GlProgram scatter;
private final GlProgram depthReduce;

private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply, GlProgram scatter) {
private IndirectPrograms(Map<PipelineProgramKey, GlProgram> pipeline, Map<InstanceType<?>, GlProgram> culling, GlProgram apply, GlProgram scatter, GlProgram depthReduce) {
this.pipeline = pipeline;
this.culling = culling;
this.apply = apply;
this.scatter = scatter;
this.depthReduce = depthReduce;
}

private static List<String> getExtensions(GlslVersion glslVersion) {
Expand Down Expand Up @@ -94,10 +97,10 @@ static void reload(ShaderSources sources, ImmutableList<PipelineProgramKey> pipe
try {
var pipelineResult = pipelineCompiler.compileAndReportErrors(pipelineKeys);
var cullingResult = cullingCompiler.compileAndReportErrors(createCullingKeys());
var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN));
var utils = utilCompiler.compileAndReportErrors(List.of(APPLY_SHADER_MAIN, SCATTER_SHADER_MAIN, DEPTH_REDUCE_SHADER_MAIN));

if (pipelineResult != null && cullingResult != null && utils != null) {
newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN));
newInstance = new IndirectPrograms(pipelineResult, cullingResult, utils.get(APPLY_SHADER_MAIN), utils.get(SCATTER_SHADER_MAIN), utils.get(DEPTH_REDUCE_SHADER_MAIN));
}
} catch (Throwable t) {
FlwPrograms.LOGGER.error("Failed to compile indirect programs", t);
Expand Down Expand Up @@ -184,6 +187,10 @@ public GlProgram getScatterProgram() {
return scatter;
}

public GlProgram getDepthReduceProgram() {
return depthReduce;
}

@Override
protected void _delete() {
pipeline.values()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,106 @@
package dev.engine_room.flywheel.backend.engine.indirect;

import org.lwjgl.opengl.GL32;
import org.lwjgl.opengl.GL46;

import com.mojang.blaze3d.platform.GlStateManager;

import dev.engine_room.flywheel.backend.gl.shader.GlProgram;
import dev.engine_room.flywheel.lib.math.MoreMath;
import net.minecraft.client.Minecraft;

public class DepthPyramid {
private final GlProgram depthReduceProgram;

public final int pyramidTextureId;

private int lastWidth = -1;
private int lastHeight = -1;

public DepthPyramid(GlProgram depthReduceProgram) {
this.depthReduceProgram = depthReduceProgram;

pyramidTextureId = GL32.glGenTextures();

GlStateManager._bindTexture(pyramidTextureId);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MIN_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_MAG_FILTER, GL32.GL_NEAREST);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_COMPARE_MODE, GL32.GL_NONE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_S, GL32.GL_CLAMP_TO_EDGE);
GlStateManager._texParameter(GL32.GL_TEXTURE_2D, GL32.GL_TEXTURE_WRAP_T, GL32.GL_CLAMP_TO_EDGE);

}

public void generate() {
var mainRenderTarget = Minecraft.getInstance()
.getMainRenderTarget();

int width = mainRenderTarget.width;
int height = mainRenderTarget.height;

int mipLevels = getImageMipLevels(width, height);

createPyramidMips(mipLevels, width, height);

int depthBufferId = mainRenderTarget.getDepthTextureId();

GlStateManager._bindTexture(depthBufferId);

GL46.glMemoryBarrier(GL46.GL_FRAMEBUFFER_BARRIER_BIT);

GL46.glActiveTexture(GL32.GL_TEXTURE1);

depthReduceProgram.bind();

for (int i = 0; i < mipLevels; i++) {
int mipWidth = Math.max(1, width >> i);
int mipHeight = Math.max(1, height >> i);

int srcTexture = (i == 0) ? depthBufferId : pyramidTextureId;
GL46.glBindTexture(GL32.GL_TEXTURE_2D, srcTexture);

GL46.glBindImageTexture(0, pyramidTextureId, i, false, 0, GL32.GL_WRITE_ONLY, GL32.GL_R32F);

depthReduceProgram.setUVec2("imageSize", mipWidth, mipHeight);
depthReduceProgram.setInt("lod", Math.max(0, i - 1));

GL46.glDispatchCompute(MoreMath.ceilingDiv(mipWidth, 8), MoreMath.ceilingDiv(mipHeight, 8), 1);

GL46.glMemoryBarrier(GL46.GL_TEXTURE_FETCH_BARRIER_BIT);
}
}

public void delete() {
GL32.glDeleteTextures(pyramidTextureId);
}

private void createPyramidMips(int mipLevels, int width, int height) {
if (lastWidth == width && lastHeight == height) {
return;
}

lastWidth = width;
lastHeight = height;

GL32.glBindTexture(GL32.GL_TEXTURE_2D, pyramidTextureId);

for (int i = 0; i < mipLevels; i++) {
int mipWidth = Math.max(1, width >> (i + 1));
int mipHeight = Math.max(1, height >> (i + 1));

GL32.glTexImage2D(GL32.GL_TEXTURE_2D, i, GL32.GL_R32F, mipWidth, mipHeight, 0, GL32.GL_RED, GL32.GL_FLOAT, 0);
}
}

private static int getImageMipLevels(int width, int height) {
int result = 1;

while (width > 2 && height > 2) {
result++;
width /= 2;
height /= 2;
}

return result;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,8 @@
import java.util.List;
import java.util.Map;

import org.lwjgl.opengl.GL46;

import dev.engine_room.flywheel.api.backend.Engine;
import dev.engine_room.flywheel.api.instance.Instance;
import dev.engine_room.flywheel.api.instance.InstanceType;
Expand Down Expand Up @@ -46,6 +48,8 @@ public class IndirectDrawManager extends DrawManager<IndirectInstancer<?>> {
private final LightBuffers lightBuffers;
private final MatrixBuffer matrixBuffer;

private final DepthPyramid depthPyramid;

private boolean needsBarrier = false;

public IndirectDrawManager(IndirectPrograms programs) {
Expand All @@ -58,6 +62,8 @@ public IndirectDrawManager(IndirectPrograms programs) {
meshPool.bind(vertexArray);
lightBuffers = new LightBuffers();
matrixBuffer = new MatrixBuffer();

depthPyramid = new DepthPyramid(programs.getDepthReduceProgram());
}

@Override
Expand Down Expand Up @@ -136,13 +142,18 @@ public void flush(LightStorage lightStorage, EnvironmentStorage environmentStora

stagingBuffer.flush();

depthPyramid.generate();

// We could probably save some driver calls here when there are
// actually zero instances, but that feels like a very rare case

glMemoryBarrier(GL_SHADER_STORAGE_BARRIER_BIT);

matrixBuffer.bind();

GL46.glActiveTexture(GL46.GL_TEXTURE0);
GL46.glBindTexture(GL46.GL_TEXTURE_2D, depthPyramid.pyramidTextureId);

for (var group : cullingGroups.values()) {
group.dispatchCull();
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@
import net.minecraft.world.phys.Vec3;

public final class FrameUniforms extends UniformWriter {
private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 10;
private static final int SIZE = 96 + 64 * 9 + 16 * 5 + 8 * 2 + 8 + 4 * 16;
static final UniformBuffer BUFFER = new UniformBuffer(Uniforms.FRAME_INDEX, SIZE);

private static final Matrix4f VIEW = new Matrix4f();
Expand Down Expand Up @@ -112,6 +112,8 @@ public static void update(RenderContext context) {

ptr = writeInt(ptr, debugMode);

ptr = writeCullData(ptr);

firstWrite = false;
BUFFER.markDirty();
}
Expand Down Expand Up @@ -179,6 +181,18 @@ private static long writeCameraIn(long ptr, Camera camera) {
return writeInFluidAndBlock(ptr, level, blockPos, cameraPos);
}

private static long writeCullData(long ptr) {
ptr = writeFloat(ptr, 0.05F); // zNear
ptr = writeFloat(ptr, Minecraft.getInstance().gameRenderer.getDepthFar()); // zFar
ptr = writeFloat(ptr, PROJECTION.m00()); // P00
ptr = writeFloat(ptr, PROJECTION.m11()); // P11
ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().width >> 1); // pyramidWidth
ptr = writeFloat(ptr, Minecraft.getInstance().getMainRenderTarget().height >> 1); // pyramidHeight
ptr = writeInt(ptr, 0); // useMin

return ptr;
}

/**
* Writes the frustum planes of the given projection matrix to the given buffer.<p>
* Uses a different format that is friendly towards an optimized instruction-parallel
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
import static org.lwjgl.opengl.GL20.glUniformMatrix3fv;
import static org.lwjgl.opengl.GL20.glUniformMatrix4fv;
import static org.lwjgl.opengl.GL30.glUniform1ui;
import static org.lwjgl.opengl.GL30.glUniform2ui;
import static org.lwjgl.opengl.GL31.GL_INVALID_INDEX;
import static org.lwjgl.opengl.GL31.glGetUniformBlockIndex;
import static org.lwjgl.opengl.GL31.glUniformBlockBinding;
Expand Down Expand Up @@ -118,6 +119,16 @@ public void setUInt(String glslName, int value) {
glUniform1ui(uniform, value);
}

public void setUVec2(String name, int x, int y) {
int uniform = getUniformLocation(name);

if (uniform < 0) {
return;
}

glUniform2ui(uniform, x, y);
}

public void setInt(String glslName, int value) {
int uniform = getUniformLocation(glslName);

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,12 @@ layout(std430, binding = _FLW_MODEL_BUFFER_BINDING) restrict buffer ModelBuffer
ModelDescriptor _flw_models[];
};

layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict buffer MatrixBuffer {
layout(std430, binding = _FLW_MATRIX_BUFFER_BINDING) restrict readonly buffer MatrixBuffer {
Matrices _flw_matrices[];
};

layout(binding = 0) uniform sampler2D _flw_depthPyramid;

// Disgustingly vectorized sphere frustum intersection taking advantage of ahead of time packing.
// Only uses 6 fmas and some boolean ops.
// See also:
Expand All @@ -40,6 +42,28 @@ bool _flw_testSphere(vec3 center, float radius) {
return all(xyInside) && all(zInside);
}

bool projectSphere(vec3 c, float r, float znear, float P00, float P11, out vec4 aabb) {
if (c.z > r + znear) {
return false;
}

vec3 cr = c * r;
float czr2 = c.z * c.z - r * r;

float vx = sqrt(c.x * c.x + czr2);
float minx = (vx * c.x - cr.z) / (vx * c.z + cr.x);
float maxx = (vx * c.x + cr.z) / (vx * c.z - cr.x);

float vy = sqrt(c.y * c.y + czr2);
float miny = (vy * c.y - cr.z) / (vy * c.z + cr.y);
float maxy = (vy * c.y + cr.z) / (vy * c.z - cr.y);

aabb = vec4(minx * P00, miny * P11, maxx * P00, maxy * P11);
aabb = aabb.xwzy * vec4(-0.5f, -0.5f, -0.5f, -0.5f) + vec4(0.5f); // clip space -> uv space

return true;
}

bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
uint matrixIndex = _flw_models[modelIndex].matrixIndex;
BoundingSphere sphere = _flw_models[modelIndex].boundingSphere;
Expand All @@ -56,7 +80,38 @@ bool _flw_isVisible(uint instanceIndex, uint modelIndex) {
transformBoundingSphere(_flw_matrices[matrixIndex].pose, center, radius);
}

return _flw_testSphere(center, radius);
bool isVisible = _flw_testSphere(center, radius);

if (isVisible) {
transformBoundingSphere(flw_view, center, radius);

vec4 aabb;
if (projectSphere(center, radius, _flw_cullData.znear, _flw_cullData.P00, _flw_cullData.P11, aabb))
{
float width = (aabb.z - aabb.x) * _flw_cullData.pyramidWidth;
float height = (aabb.w - aabb.y) * _flw_cullData.pyramidHeight;

float level = floor(log2(max(width, height)));

float depth01 = textureLod(_flw_depthPyramid, aabb.xw, level).r;
float depth11 = textureLod(_flw_depthPyramid, aabb.zw, level).r;
float depth10 = textureLod(_flw_depthPyramid, aabb.zy, level).r;
float depth00 = textureLod(_flw_depthPyramid, aabb.xy, level).r;

float depth;
if (_flw_cullData.useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}

float depthSphere = 1. + _flw_cullData.znear / (center.z + radius);

isVisible = isVisible && depthSphere <= depth;
}
}

return isVisible;
}

void main() {
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
layout(local_size_x = 8, local_size_y = 8) in;

layout(binding = 0, r32f) uniform writeonly image2D outImage;
layout(binding = 1) uniform sampler2D inImage;

uniform uvec2 imageSize;
uniform int lod;

uniform int useMin = 0;

void main() {
uvec2 pos = gl_GlobalInvocationID.xy;

ivec2 samplePos = ivec2(pos) * 2;

float depth01 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 1)).r;
float depth11 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 1)).r;
float depth10 = texelFetchOffset(inImage, samplePos, lod, ivec2(1, 0)).r;
float depth00 = texelFetchOffset(inImage, samplePos, lod, ivec2(0, 0)).r;

float depth;
if (useMin == 0) {
depth = max(max(depth00, depth01), max(depth10, depth11));
} else {
depth = min(min(depth00, depth01), min(depth10, depth11));
}

imageStore(outImage, ivec2(pos), vec4(depth));
}
Loading

0 comments on commit ec45287

Please sign in to comment.