Skip to content

Commit

Permalink
shadertui live reloading demo initial implementation
Browse files Browse the repository at this point in the history
  • Loading branch information
austinvhuang committed Jul 11, 2024
1 parent f983bcf commit fb56cf0
Show file tree
Hide file tree
Showing 8 changed files with 304 additions and 9 deletions.
1 change: 1 addition & 0 deletions examples/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ In order of beginning to advanced:
| [gpu_puzzles](gpu_puzzles) | (WIP) Implementation of Sasha Rush's GPU puzzles
| [render](render) | GPU rendering of a signed distance function for a 3D sphere. |
| [physics](physics) | Parallel physics simulation of a double pendulum with each thread starting at a different initial condition. |
| [matmul](matmul) | Tiled matrix multiplication. |
| [webgpu_from_scratch](webgpu_from_scratch) | A minimal from-scratch example of how to use WebGPU directly without this library. This is useful to understand the code internals of gpu.cpp. Note this takes a while to build as it compiles the WebGPU C API implementation. |

Future examples that may be added (collaborations welcome):
Expand Down
4 changes: 2 additions & 2 deletions examples/matmul/run.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -354,10 +354,10 @@ void runTest(int version, size_t M, size_t K, size_t N,

// Report performance
auto duration =
std::chrono::duration_cast<std::chrono::milliseconds>(end - start);
std::chrono::duration_cast<std::chrono::seconds>(end - start);
float gflops = 2 * M * N *
K / // factor of 2 for multiplication & accumulation
(static_cast<float>(duration.count()) / 1000.0) /
(static_cast<float>(duration.count())) /
1000000000.0 * static_cast<float>(nIter);
LOG(kDefLog, kInfo,
"Execution Time: (M = %d, K = %d, N = %d) x %d iterations : %.1f "
Expand Down
22 changes: 22 additions & 0 deletions examples/shadertui/Makefile
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
CXX=clang++
GPUCPP ?= $(PWD)/../..
LIBDIR ?= $(GPUCPP)/third_party/lib
LIBSPEC ?= . $(GPUCPP)/source
NUM_JOBS?=$(shell nproc)
TARGET=shadertui
FLAGS=-stdlib=libc++ -std=c++17 -I$(GPUCPP) -I$(GPUCPP)/third_party/headers -L$(GPUCPP)/third_party/lib run.cpp -ldl -ldawn
CODEPATH = find . ../../utils ../../ -maxdepth 1 -type f

run: ./build/$(TARGET)
$(LIBSPEC) && ./build/$(TARGET)

# Use clang -v to see the include paths
build/$(TARGET): run.cpp
mkdir -p build && $(CXX) $(FLAGS) -o ./build/$(TARGET)

watch:
@command -v entr >/dev/null 2>&1 || { echo >&2 "Please install entr with 'brew install entr' or 'sudo apt-get install entr'"; exit 1; }
mkdir -p build && $(CODEPATH) | entr -s "$(LIBSPEC) && rm -f ./build/$(TARGET) && make -j$(NUM_JOBS) run"

clean:
read -r -p "This will delete the contents of build/*. Are you sure? [CTRL-C to abort] " response && rm -rf build/*
123 changes: 123 additions & 0 deletions examples/shadertui/run.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,123 @@
#include "gpu.h"
#include <array>
#include <cstdio>
#include <fstream>
#include <future>
#include <string>
#include <thread>

#include "utils/array_utils.h"
#include "utils/logging.h"

using namespace gpu;

template <size_t rows, size_t cols>
void rasterize(const std::array<float, rows * cols> &values,
std::array<char, rows *(cols + 1)> &raster) {
static const char intensity[] = " .`'^-+=*x17X$8#%@";
for (size_t i = 0; i < rows; ++i) {
for (size_t j = 0; j < cols; ++j) {
// values ranges b/w 0 and 1
size_t index =
std::min(sizeof(intensity) - 2,
std::max(0ul, static_cast<size_t>(values[i * cols + j] *
(sizeof(intensity) - 2))));
raster[i * (cols + 1) + j] = intensity[index];
}
raster[i * (cols + 1) + cols] = '\n';
}
}

float getCurrentTimeInMilliseconds(
std::chrono::time_point<std::chrono::high_resolution_clock> &zeroTime) {
std::chrono::duration<float> duration =
std::chrono::high_resolution_clock::now() - zeroTime;
return duration.count();
}

void loadShaderCode(const std::string &filename, std::string &codeString) {
codeString = "";
FILE *file = fopen(filename.c_str(), "r");
while (!file) {
fclose(file);
std::this_thread::sleep_for(std::chrono::milliseconds(100));
file = fopen(filename.c_str(), "r");
}
char buffer[4096];
while (fgets(buffer, sizeof(buffer), file)) {
codeString += buffer;
}
fclose(file);
}

int main() {

Context ctx = createContext();
static constexpr size_t kRows = 40;
static constexpr size_t kCols = 70;

LOG(kDefLog, kInfo, "Creating screen tensor");

std::array<float, kRows * kCols> screenArr;
std::fill(begin(screenArr), end(screenArr), 0.0);
Tensor screen = createTensor(ctx, {kRows, kCols}, kf32, screenArr.data());

std::promise<void> promise;
std::future<void> future = promise.get_future();

std::string codeString;
struct Params {
float time;
uint32_t screenWidth;
uint32_t screenHeight;
} params = {0.0, kCols, kRows};

LOG(kDefLog, kInfo, "Loading shader code from shader.wgsl");

LOG(kDefLog, kInfo, "Creating shader and kernel");

loadShaderCode("shader.wgsl", codeString);
ShaderCode shader = createShader(codeString.c_str(), Shape{16, 16, 1});
Kernel renderKernel =
createKernel(ctx, shader, Bindings{screen},
cdiv({kCols, kRows, 1}, shader.workgroupSize), params);

LOG(kDefLog, kInfo, "Starting render loop");

std::array<char, kRows *(kCols + 1)> raster;

auto start = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> elapsed;
size_t ticks = 0;
while (true) {
if (elapsed.count() - static_cast<float>(ticks) > 1.0) {
loadShaderCode("shader.wgsl", codeString);
if (codeString != shader.data) {
shader = createShader(codeString.c_str(), Shape{16, 16, 1});
renderKernel =
createKernel(ctx, shader, Bindings{screen},
cdiv({kCols, kRows, 1}, shader.workgroupSize), params);
ticks++;
}
}
params.time = getCurrentTimeInMilliseconds(start);
wgpuQueueWriteBuffer(ctx.queue,
renderKernel.buffers[renderKernel.numBindings - 1], 0,
static_cast<void *>(&params), sizeof(params));
auto frameStart = std::chrono::high_resolution_clock::now();
std::promise<void> promise;
std::future<void> future = promise.get_future();
dispatchKernel(ctx, renderKernel, promise);
wait(ctx, future);
resetCommandBuffer(ctx.device, renderKernel);
toCPU(ctx, screen, screenArr);
rasterize<kRows, kCols>(screenArr, raster);
auto frameEnd = std::chrono::high_resolution_clock::now();
std::chrono::duration<float> frameElapsed = frameEnd - frameStart;
elapsed = frameEnd - start;
std::this_thread::sleep_for(std::chrono::milliseconds(20) - frameElapsed);
printf("\033[H\033[J%s\nReloaded file %zu times\n", raster.data(), ticks);
}

LOG(kDefLog, kInfo, "Done");
}
27 changes: 27 additions & 0 deletions examples/shadertui/shader.wgsl
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
@group(0) @binding(0) var<storage, read_write> out: array<f32>;
@group(0) @binding(1) var<uniform> params: Params;

struct Params {
time: f32,
screenwidth: u32,
screenheight: u32,
};

fn sdf(p: vec2<f32>, c: vec2<f32>, r: f32) -> f32 {
return length(p - c) - r;
}

@compute @workgroup_size(16, 16, 1)
fn main(@builtin(global_invocation_id) globalID : vec3<u32>) {
let xy: vec2<f32> =
vec2<f32>(f32(globalID.x) / f32(params.screenwidth),
f32(globalID.y) / f32(params.screenheight));
let t: f32 = params.time / 1.0;
let idx = globalID.y * params.screenwidth + globalID.x;
let center = vec2<f32>(0.5, 0.5 + 0.3 * sin(3.0 * t));
let center2 = vec2<f32>(0.5 + 0.2 * cos(3.0 * t), 0.5);
// out[idx] += 0.4 - min(5 * abs(sdf(xy, center, 0.2)), 0.4) + 0.5 * cos(xy.y + t) + 0.5 * sin(xy.x);
out[idx] = 0.3 - min(5 * abs(sdf(xy, center, 0.2)), 0.3);
out[idx] += 0.3 - min(5 * abs(sdf(xy, center2, 0.2)), 0.3);
out[idx] += 0.4 * sin(xy.y +t);
}
14 changes: 7 additions & 7 deletions gpu.h
Original file line number Diff line number Diff line change
Expand Up @@ -171,10 +171,10 @@ inline std::string toString(const Shape &shape) {
inline std::string toString(size_t value) { return std::to_string(value); }

/**
* @brief Represents a shader code.
* workgroup size and precision are stored since they are specified in the
* shader code and making the values available helps keep parameters
* consistent.
* @brief Represents shader code. Wrapper type around the code string with
* additional metadata for workgroup size and precision since they are
* specified in the shader code. Additionally, label and entryPoint are used by
* `createKernel()` to specify the label and entry point of the shader.
*/
struct ShaderCode {
inline ShaderCode(const std::string &data = "", size_t workgroupSize = 256,
Expand Down Expand Up @@ -694,7 +694,7 @@ inline void toCPU(Context &ctx, Tensor &tensor, float *data,
* @example toCPU(ctx, tensor, data);
*/
template <size_t N>
void toCPU(Context &ctx, Tensor &tensor, std::array<float, N> data) {
void toCPU(Context &ctx, Tensor &tensor, std::array<float, N>& data) {
toCPU(ctx, tensor, data.data(), sizeof(data));
}

Expand Down Expand Up @@ -922,8 +922,8 @@ inline Kernel createKernel(Context &ctx, const ShaderCode &shader,
computePipelineDesc.layout = pipelineLayout;
computePipelineDesc.compute.module =
wgpuDeviceCreateShaderModule(device, &shaderModuleDesc);
computePipelineDesc.compute.entryPoint = "main";
computePipelineDesc.label = "compute pipeline";
computePipelineDesc.compute.entryPoint = shader.entryPoint.c_str();
computePipelineDesc.label = shader.label.c_str();
op.computePipeline =
wgpuDeviceCreateComputePipeline(device, &computePipelineDesc);
}
Expand Down
Loading

0 comments on commit fb56cf0

Please sign in to comment.