diff --git a/INSTRUCTION.md b/INSTRUCTION.md index 09acff1..0798a80 100644 --- a/INSTRUCTION.md +++ b/INSTRUCTION.md @@ -68,38 +68,55 @@ For editing the project, you will want to use [Visual Studio Code](https://code. WebGPU errors will appear in your browser's developer console (Ctrl + Shift + J for Chrome on Windows). Unlike some other graphics APIs, WebGPU error messages are often very helpful, especially if you've labeled your various pipeline components with meaningful names. Be sure to check the console whenever something isn't working correctly. ### Part 1: Understanding 3D Gaussian Point Cloud & Add MVP calculation - -To start off, read over the [paper](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/) for some basic ideas. Although we don't focus on training part of the algorithm, it still knowledge you can learn for your good. Then read over point cloud renderer, add MVP calculation to the vertex shader. After that, you can see point cloud being rendered to screen. +- Read over the [3D Gaussian Splatting Paper](https://repo-sam.inria.fr/fungraph/3d-gaussian-splatting/) to have a basic understanding. +- Then read over `point_cloud` renderer, add MVP calculation to the vertex shader. After that, you can see yellow point cloud rendered to screen. ### Part 2: Gaussian Renderer -Pipeline: - +#### Gaussian Renderer Implementation: - Loading 3D gaussian data into GPU (this part is done for you, see `PointCloud` in load.ts) - Preprocess 3D gaussian data - - Do a simple view frustum culling to keep only visible gaussians (you may want to keep the bounding box to be slightly larger than actual frustum) - - Compute 3D covariance based on rotation and scale, also user input gaussian multipler. (see [post](https://github.com/kwea123/gaussian_splatting_notes) on 1.1 section) - - Compute 2D conic, maximum radius, and maximum quad size in NDC (see [post](https://github.com/kwea123/gaussian_splatting_notes) on 1.1 section) - - Store essential 2D gaussian data to later rasteriation pipeline + - Implement view frustum culling to remove non-visible Gaussians (make bounding box to be slightly larger to keep the edge gaussians) + - Compute 3D covariance based on rotation and scale, also user inputted gaussian multipler. (see [post](https://github.com/kwea123/gaussian_splatting_notes) on 1.1 section) + - Compute 2D conic, maximum radius, and maximum quad size in NDC (see [post](https://github.com/kwea123/gaussian_splatting_notes) on 1.1 section) + - Using spherical harmonics coeffiecients to evaluate the color of the gaussian from particular view direction (evaluation function is provided, see [post](https://beatthezombie.github.io/sh_post_1/) ). + - Store essential 2D gaussian data for later rasteriation pipeline - Add key_size, indices, and depth to sorter. - Sort Gaussians based on depth - Render the 2D splat on quad utlizing indirect draw call (instance count from process step) in sorted order. + - vertex shader: reconstruct 2D quad vertices (NDC) from splat data, send conic and color information to fragment shader + - fragment shader: using conic matrix [see "Centered matrix equation"](https://en.wikipedia.org/wiki/Matrix_representation_of_conic_sections) to determine whether point is inside splat. The opacity should decade exponentially as it distant from center. + +#### Hints: +- useful shader functions: + - `unpack2x16float`: all gaussian data is packed in f16, need to unpacked it in shader. + - `pack2x16float`: pack you 2D gaussian data in f16 format + - `atomicAdd` : store indices in sorting buffer using thread-safe updates in compute shaders +- Setting up pipeline: + - `device.queue.writeBuffer`: Remember to clean sort infos each frame. + - `encoder.copyBufferToBuffer`: GPU buffer transfer data to other GPU buffer + - `blend`: using similar blending function as rendering semi-transparent texture to screen. + - `depth`: similarly to semi-transparent texture, we should render gaussians back to front. -Note: original paper do tile-based depth sorting for each tile and add on opacity till opacity is near 1.0, here we utiized the standard rasterization pipeline, so we render all of them all together. +### Part 2: Extra Credit: -### Part 2: Extra Credit: Optimization +#### Optimization: tile-based depth sorting Follow the [paper](https://github.com/kwea123/gaussian_splatting_notes) implementation using tile-based depth sorting. Then composite the final image using compute shader. ![Gaussian with Tile](./images/sorting2.webp) ![Gaussian with Tile](./images/sorting1.webp) +#### Optimization: half-precision floating point calculation + +See the [WebGPU supported f16 function](https://webgpufundamentals.org/webgpu/lessons/webgpu-wgsl-function-reference.html), implement your compressed f16 compute shader for preprocess step. + ## Performance Analysis ## Base Code Walkthrough -In general, you can search for comments containing "Task" to see the most important/useful parts of the base code. +In general, you can search for comments containing "TODO" to see the most important/useful parts of the base code. ## README diff --git a/src/renderers/gaussian-renderer.ts b/src/renderers/gaussian-renderer.ts index 415d83d..1684523 100644 --- a/src/renderers/gaussian-renderer.ts +++ b/src/renderers/gaussian-renderer.ts @@ -1,17 +1,13 @@ import { PointCloud } from '../utils/load'; import preprocessWGSL from '../shaders/preprocess.wgsl'; import renderWGSL from '../shaders/gaussian.wgsl'; -import { get_sorter,c_histogram_block_rows } from '../sort/sort'; +import { get_sorter,c_histogram_block_rows,C } from '../sort/sort'; import { Renderer } from './renderer'; export interface GaussianRenderer extends Renderer { } -const c_size_render_settings_buffer = Uint32Array.BYTES_PER_ELEMENT; -const c_workgroup_size_preprocess = 256; -const c_size_2d_splat = 24; - // Utility to create GPU buffers const createBuffer = ( device: GPUDevice, @@ -50,7 +46,7 @@ export default function get_renderer( module: device.createShaderModule({ code: preprocessWGSL }), entryPoint: 'preprocess', constants: { - workgroupSize: c_workgroup_size_preprocess, + workgroupSize: C.histogram_wg_size, sortKeyPerThread: c_histogram_block_rows, }, }, @@ -67,9 +63,6 @@ export default function get_renderer( ], }); - const preprocess_workgroup_count = Math.ceil( - pc.num_points / c_workgroup_size_preprocess - ); // =============================================== // Create Render Pipeline and Bind Groups @@ -79,23 +72,13 @@ export default function get_renderer( // =============================================== // Command Encoder Functions // =============================================== - const preprocess = (encoder: GPUCommandEncoder) => { - device.queue.writeBuffer(sorter.sort_info_buffer, 0, nulling_data); - device.queue.writeBuffer(sorter.sort_dispatch_indirect_buffer, 0, nulling_data); - - const pass = encoder.beginComputePass({ label: 'preprocess' }); - pass.setPipeline(preprocess_pipeline); - pass.setBindGroup(2, sort_bind_group); - pass.dispatchWorkgroups(preprocess_workgroup_count); - pass.end(); - }; + // =============================================== // Return Render Object // =============================================== return { frame: (encoder: GPUCommandEncoder, texture_view: GPUTextureView) => { - preprocess(encoder); sorter.sort(encoder); }, camera_buffer, diff --git a/src/shaders/gaussian.wgsl b/src/shaders/gaussian.wgsl index f6865e7..759226d 100644 --- a/src/shaders/gaussian.wgsl +++ b/src/shaders/gaussian.wgsl @@ -1,11 +1,16 @@ struct VertexOutput { @builtin(position) position: vec4, + //TODO: information passed from vertex shader to fragment shader }; +struct Splat { + //TODO: information defined in preprocess compute shader +}; @vertex fn vs_main( ) -> VertexOutput { + //TODO: reconstruct 2D quad based on information from splat, pass var out: VertexOutput; out.position = vec4(1. ,1. , 0., 1.); return out; diff --git a/src/shaders/point_cloud.wgsl b/src/shaders/point_cloud.wgsl index afe2bd3..01dded1 100644 --- a/src/shaders/point_cloud.wgsl +++ b/src/shaders/point_cloud.wgsl @@ -34,7 +34,7 @@ fn vs_main( let b = unpack2x16float(vertex.pos_opacity[1]); let pos = vec4(a.x, a.y, b.x, 1.); - // Task 0: MVP calculations + // TODO: MVP calculations out.position = pos; return out; diff --git a/src/shaders/preprocess.wgsl b/src/shaders/preprocess.wgsl index 2fad013..bbc63f5 100644 --- a/src/shaders/preprocess.wgsl +++ b/src/shaders/preprocess.wgsl @@ -28,12 +28,38 @@ struct DispatchIndirect { struct SortInfos { keys_size: atomic, // instance_count in DrawIndirect + //data below is for info inside radix sort padded_size: u32, passes: u32, even_pass: u32, odd_pass: u32, } +struct CameraUniforms { + view: mat4x4, + view_inv: mat4x4, + proj: mat4x4, + proj_inv: mat4x4, + viewport: vec2, + focal: vec2 +}; + +struct RenderSettings { + gaussian_scaling: f32, + sh_deg: f32, +} + +struct Gaussian { + pos_opacity: array, + rot: array, + scale: array +}; + +struct Splat { + //TODO: store information for 2D splat rendering +}; + +//TODO: bind your data here @group(2) @binding(0) var sort_infos: SortInfos; @group(2) @binding(1) @@ -43,8 +69,50 @@ var sort_indices : array; @group(2) @binding(3) var sort_dispatch: DispatchIndirect; +/// reads the ith sh coef from the storage buffer +fn sh_coef(splat_idx: u32, c_idx: u32) -> vec3 { + //TODO: access your binded sh_coeff, see load.ts for how it is stored + return vec3(0.0); +} + +// spherical harmonics evaluation with Condon–Shortley phase +fn computeColorFromSH(dir: vec3, v_idx: u32, sh_deg: u32) -> vec3 { + var result = SH_C0 * sh_coef(v_idx, 0u); + + if sh_deg > 0u { + + let x = dir.x; + let y = dir.y; + let z = dir.z; + + result += - SH_C1 * y * sh_coef(v_idx, 1u) + SH_C1 * z * sh_coef(v_idx, 2u) - SH_C1 * x * sh_coef(v_idx, 3u); + + if sh_deg > 1u { + + let xx = dir.x * dir.x; + let yy = dir.y * dir.y; + let zz = dir.z * dir.z; + let xy = dir.x * dir.y; + let yz = dir.y * dir.z; + let xz = dir.x * dir.z; + + result += SH_C2[0] * xy * sh_coef(v_idx, 4u) + SH_C2[1] * yz * sh_coef(v_idx, 5u) + SH_C2[2] * (2.0 * zz - xx - yy) * sh_coef(v_idx, 6u) + SH_C2[3] * xz * sh_coef(v_idx, 7u) + SH_C2[4] * (xx - yy) * sh_coef(v_idx, 8u); + + if sh_deg > 2u { + result += SH_C3[0] * y * (3.0 * xx - yy) * sh_coef(v_idx, 9u) + SH_C3[1] * xy * z * sh_coef(v_idx, 10u) + SH_C3[2] * y * (4.0 * zz - xx - yy) * sh_coef(v_idx, 11u) + SH_C3[3] * z * (2.0 * zz - 3.0 * xx - 3.0 * yy) * sh_coef(v_idx, 12u) + SH_C3[4] * x * (4.0 * zz - xx - yy) * sh_coef(v_idx, 13u) + SH_C3[5] * z * (xx - yy) * sh_coef(v_idx, 14u) + SH_C3[6] * x * (xx - 3.0 * yy) * sh_coef(v_idx, 15u); + } + } + } + result += 0.5; + + return max(vec3(0.), result); +} @compute @workgroup_size(workgroupSize,1,1) fn preprocess(@builtin(global_invocation_id) gid: vec3, @builtin(num_workgroups) wgs: vec3) { let idx = gid.x; + //TODO: set up pipeline as described in instruction + + let keys_per_dispatch = workgroupSize * sortKeyPerThread; + // increment DispatchIndirect.dispatchx each time you reach limit for one dispatch of keys } \ No newline at end of file diff --git a/src/sort/sort.ts b/src/sort/sort.ts index 0b81012..4bbeb23 100644 --- a/src/sort/sort.ts +++ b/src/sort/sort.ts @@ -45,7 +45,7 @@ function create_ping_pong_buffer(adjusted_count: number, keysize: number, device const c_radix_log2 = 8; export const c_histogram_block_rows = 15; -const C = { +export const C = { histogram_sg_size: 32, histogram_wg_size: 256, rs_radix_log2: 8, diff --git a/src/utils/load.ts b/src/utils/load.ts index 44ab731..42a8eba 100644 --- a/src/utils/load.ts +++ b/src/utils/load.ts @@ -37,9 +37,10 @@ export async function load(file: File, device: GPUDevice) { const nCoeffsPerColor = nRestCoeffs / 3; const sh_deg = Math.sqrt(nCoeffsPerColor + 1) - 1; const num_coefs = nShCoeffs(sh_deg); + const max_num_coefs = 16; const c_size_sh_coef = - 3 * num_coefs * c_size_float // 3 channels (RGB) x 16 coefs + 3 * max_num_coefs * c_size_float // 3 channels (RGB) x 16 coefs ; // figure out the order in which spherical harmonics should be read @@ -83,7 +84,7 @@ export async function load(file: File, device: GPUDevice) { readOffset = newReadOffset; const o = i * (c_size_3d_gaussian / c_size_float); - const output_offset = i * num_coefs * 3; + const output_offset = i * max_num_coefs * 3; for (let order = 0; order < num_coefs; ++order) { const order_offset = order * 3; @@ -113,6 +114,7 @@ export async function load(file: File, device: GPUDevice) { console.log("return result!"); return { num_points: num_points, + sh_deg: sh_deg, gaussian_3d_buffer, sh_buffer, };