diff --git a/experimental/fasthtml/Makefile b/experimental/fasthtml/Makefile
new file mode 100644
index 0000000..887db50
--- /dev/null
+++ b/experimental/fasthtml/Makefile
@@ -0,0 +1,27 @@
+GPUCPP=../..
+FLAGS=-std=c++17 -s USE_WEBGPU=1 -s ASYNCIFY=1 -I$(GPUCPP)
+
+.PHONY: default cmake check-emsdk browser clean
+
+default: server
+
+build/run.html: check-emsdk run.cpp
+ em++ run.cpp -o build/run.html \
+ $(FLAGS) --shell-file ./custom_shell.html
+
+build/run.js: check-emsdk run.cpp
+ em++ run.cpp -o build/run.js --shell-file ./custom_shell.html \
+ $(FLAGS)
+
+build/run.wasm: check-emsdk run.cpp
+ em++ run.cpp -o build/run.wasm \
+ $(FLAGS)
+
+server: build/run.wasm
+ python3 run.py
+
+clean:
+ rm -rf build/*
+
+check-emsdk:
+ @which em++ > /dev/null || (echo "emsdk not found. Please install emsdk and run 'source emsdk_env.sh' in the emsdk directory." && exit 1)
diff --git a/experimental/fasthtml/build/.gitkeep b/experimental/fasthtml/build/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/experimental/fasthtml/custom_shell.html b/experimental/fasthtml/custom_shell.html
new file mode 100644
index 0000000..516d9b9
--- /dev/null
+++ b/experimental/fasthtml/custom_shell.html
@@ -0,0 +1,53 @@
+
+
+
+
+ gpu.cpp
+
+
+
+
+
+
+
+
+
+
+
+ {{{ SCRIPT }}}
+
+
diff --git a/experimental/fasthtml/run.cpp b/experimental/fasthtml/run.cpp
new file mode 100644
index 0000000..641a85d
--- /dev/null
+++ b/experimental/fasthtml/run.cpp
@@ -0,0 +1,62 @@
+#include
+#include
+#include
+#include
+#include "gpu.h"
+
+// #include
+#include "emscripten/emscripten.h"
+
+using namespace gpu; // createContext, createTensor, createKernel,
+ // createShader, dispatchKernel, wait, toCPU
+ // Tensor, Kernel, Context, Shape, kf32
+
+static const char *kGelu = R"(
+const GELU_SCALING_FACTOR: f32 = 0.7978845608028654; // sqrt(2.0 / PI)
+@group(0) @binding(0) var inp: array<{{precision}}>;
+@group(0) @binding(1) var out: array<{{precision}}>;
+@group(0) @binding(1) var dummy: array<{{precision}}>;
+@compute @workgroup_size({{workgroupSize}})
+fn main(
+ @builtin(global_invocation_id) GlobalInvocationID: vec3) {
+ let i: u32 = GlobalInvocationID.x;
+ if (i < arrayLength(&inp)) {
+ let x: f32 = inp[i];
+ out[i] = select(0.5 * x * (1.0 + tanh(GELU_SCALING_FACTOR
+ * (x + .044715 * x * x * x))), x, x > 10.0);
+ }
+}
+)";
+
+int main(int argc, char **argv) {
+ printf("\033[2J\033[1;1H");
+ printf("\nHello gpu.cpp!\n");
+ printf("--------------\n\n");
+
+ // const WGPUInstanceDescriptor descriptor = { };
+ // std::unique_ptr descriptor = std::make_unique();
+
+ // WGPUInstance instance = wgpuCreateInstance({});
+ Context ctx = createContext({});
+ static constexpr size_t N = 5000;
+ std::array inputArr, outputArr;
+ for (int i = 0; i < N; ++i) {
+ inputArr[i] = static_cast(i) / 10.0; // dummy input data
+ }
+ Tensor input = createTensor(ctx, Shape{N}, kf32, inputArr.data());
+ Tensor output = createTensor(ctx, Shape{N}, kf32);
+ std::promise promise;
+ std::future future = promise.get_future();
+ Kernel op = createKernel(ctx, {kGelu, 256, kf32},
+ Bindings{input, output},
+ {cdiv(N, 256), 1, 1});
+ dispatchKernel(ctx, op, promise);
+ wait(ctx, future);
+ toCPU(ctx, output, outputArr.data(), sizeof(outputArr));
+ for (int i = 0; i < 12; ++i) {
+ printf(" gelu(%.2f) = %.2f\n", inputArr[i], outputArr[i]);
+ }
+ printf(" ...\n\n");
+ printf("Computed %zu values of GELU(x)\n\n", N);
+ return 0;
+}
diff --git a/experimental/fasthtml/run.py b/experimental/fasthtml/run.py
index d47af99..82054b3 100644
--- a/experimental/fasthtml/run.py
+++ b/experimental/fasthtml/run.py
@@ -7,6 +7,9 @@
TARGET = os.getenv("TARGET", "debug")
ace_editor = Script(src="https://cdnjs.cloudflare.com/ajax/libs/ace/1.4.12/ace.js")
+gpucpp_runtime = Script(src="/build/run.js")
+gpucpp_wasm = Script(src="/build/run.wasm")
+
global_style = Style("""
#editor {
height: 50vh;
@@ -17,6 +20,7 @@
HDRS = (
picolink,
ace_editor,
+ gpucpp_runtime,
global_style,
*Socials(
title="gpu.cpp gpu puzzles",
@@ -39,6 +43,10 @@
async def build(fname: str, ext: str):
return FileResponse(f"build/{fname}.{ext}")
+@app.get("/build/run.wasm")
+async def serve_wasm(fname: str, ext: str):
+ return FileResponse(f"build/run.wasm")
+
def page():
return Title("GPU Puzzles"), Body(
Div(