diff --git a/examples/hello_world/run.cpp b/examples/hello_world/run.cpp index 0c349d3..44c787e 100644 --- a/examples/hello_world/run.cpp +++ b/examples/hello_world/run.cpp @@ -1,6 +1,5 @@ #include "gpu.h" #include "nn/shaders.h" -#include "utils/logging.h" #include #include @@ -24,7 +23,6 @@ fn main( )"; int main(int argc, char **argv) { - log(kDefLog, kInfo, "Hello, gpu.cpp!"); GPUContext ctx = CreateContext(); fprintf(stdout, "\nHello, gpu.cpp\n\n"); static constexpr size_t N = 3072; diff --git a/gpu.h b/gpu.h index ff96aec..3e78b3d 100644 --- a/gpu.h +++ b/gpu.h @@ -84,7 +84,7 @@ template GPUTensors(std::array) -> GPUTensors; template GPUTensors(Args...) -> GPUTensors; struct TensorPool { - TensorPool(GPUContext *ctx) : ctx(ctx), data() {}; + TensorPool(GPUContext *ctx) : ctx(ctx), data(){}; GPUContext *ctx; std::unordered_map data; ~TensorPool(); @@ -140,7 +140,7 @@ struct Kernel { size_t outputSize; size_t numBuffers; size_t numInputs; - WGPUCommandBuffer commandBuffer; // destroyed upon submission + WGPUCommandBuffer commandBuffer; // destroyed upon submission WGPUComputePipeline computePipeline; // persists between submission WGPUBuffer readbackBuffer; CallbackDataDyn callbackData; @@ -175,7 +175,8 @@ struct MultiKernel { // paramSizes = 0 means no params buffer std::unique_ptr numInputs; // length = numShaders WGPUCommandBuffer commandBuffer; // All kernels in the pipeline - WGPUComputePipeline computePipeline; // TODO(avh): decide how to handle compute pipelines for multikernel + WGPUComputePipeline computePipeline; // TODO(avh): decide how to handle + // compute pipelines for multikernel WGPUBuffer readbackBuffer; // Readback buffer for the final output buffer CallbackDataDyn callbackData; std::promise promise; @@ -342,17 +343,6 @@ inline void check(bool condition, const char *message, } } -void showDeviceInfo(WGPUAdapter &adapter) { - WGPUAdapterProperties properties; - wgpuAdapterGetProperties(adapter, &properties); - printf("Device Name: %s\n", properties.name); - printf("Vendor ID: %u\n", properties.vendorID); - printf("Device ID: %u\n", properties.deviceID); - WGPULimits limits; - WGPUSupportedLimits supportedLimits; - wgpuAdapterGetLimits(adapter, &supportedLimits); -} - GPUContext CreateContext(bool quietLogging = true, const WGPUInstanceDescriptor &desc = {}, const WGPURequestAdapterOptions &adapterOpts = {}, @@ -525,13 +515,7 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader, const GPUTensor *inputs, size_t numInputs, const GPUTensor &output, const void *params, size_t paramsSize, Shape nThreads) { - if (nThreads.rank < 3) { - const size_t rank = nThreads.rank; - nThreads.rank = 3; - for (size_t i = rank; i < 3; i++) { - nThreads[i] = 1; - } - } + assert(nThreads.rank == 3); WGPUDevice device = ctx.device; WGPUQueue queue = ctx.queue; Kernel op; @@ -727,16 +711,19 @@ Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader, return op; } -// default nThreads to output.shape Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader, const GPUTensor *inputs, size_t numInputs, const GPUTensor &output, const void *params = nullptr, size_t paramsSize = 0) { + Shape nThreads = output.shape; + nThreads.rank = 3; + for (size_t i = output.shape.rank; i < 3; i++) { + nThreads[i] = 1; + } return CreateKernel(ctx, shader, inputs, numInputs, output, params, - paramsSize, output.shape); + paramsSize, nThreads); } -// comptime template for paramtype - is this needed? template Kernel CreateKernel(GPUContext &ctx, const ShaderCode &shader, const GPUTensor *inputs, size_t numInputs,