diff --git a/modules/dnn/src/op_webgpu.cpp b/modules/dnn/src/op_webgpu.cpp index 70c7288752cb..ddc876e2978e 100644 --- a/modules/dnn/src/op_webgpu.cpp +++ b/modules/dnn/src/op_webgpu.cpp @@ -20,7 +20,7 @@ void copyToMat(Mat &dst, webgpu::Tensor &src) CV_Assert(dst.type() == CV_32F); std::vector shape = src.getShape(); - void *data = const_cast(src.map() ); + void *data = const_cast(src.mapRead() ); Mat tmp(shape, CV_32F, data); tmp.copyTo(dst); src.unMap(); diff --git a/modules/dnn/src/webgpu/dawnAPITest/helloCompute.cpp b/modules/dnn/src/webgpu/dawnAPITest/helloCompute.cpp index c079a391db2f..1e6b93ad359e 100644 --- a/modules/dnn/src/webgpu/dawnAPITest/helloCompute.cpp +++ b/modules/dnn/src/webgpu/dawnAPITest/helloCompute.cpp @@ -183,7 +183,7 @@ wgpu::Device createCppDawnDevice() { return wgpu::Device::Acquire(backendDevice); } -wgpu::CreateBufferMappedResult createBufferMappedFromData( wgpu::Device& device, +wgpu::CreateBufferMappedResult createBufferMappedFromData(wgpu::Device& device, const void* data, size_t size, wgpu::BufferUsage usage){ diff --git a/modules/dnn/src/webgpu/dawnWrapperTest/CMakeLists.txt b/modules/dnn/src/webgpu/dawnWrapperTest/CMakeLists.txt index 9d2e434985d7..419bff402be9 100644 --- a/modules/dnn/src/webgpu/dawnWrapperTest/CMakeLists.txt +++ b/modules/dnn/src/webgpu/dawnWrapperTest/CMakeLists.txt @@ -20,7 +20,7 @@ file(GLOB DNN ${OPENCV_SOURCE_DIR}/modules/dnn/src/webgpu/dawn/*.cpp ) -add_executable( softmaxTest op_softmax_test.cpp +add_executable(softmaxTest op_softmax_test.cpp ${SOURCES} ${Dawn} ${DNN} diff --git a/modules/dnn/src/webgpu/dawnWrapperTest/op_softmax_test.cpp b/modules/dnn/src/webgpu/dawnWrapperTest/op_softmax_test.cpp index c30d4a7d24a7..60f92c2ec931 100644 --- a/modules/dnn/src/webgpu/dawnWrapperTest/op_softmax_test.cpp +++ b/modules/dnn/src/webgpu/dawnWrapperTest/op_softmax_test.cpp @@ -18,8 +18,7 @@ void printData(const void * data, int num) { } int main(int argc, char** argv ) { - webgpu::wDevice = std::make_shared(webgpu::createCppDawnDevice()); - webgpu::wQueue = std::make_shared(webgpu::wDevice->GetDefaultQueue()); + webgpu::isAvailable(); float inputData1[] = {1, 2, 3, 4, 5, 6, 7, 8}; std::vector shape = {2,4,1}; // outer_size * channels * channel_size diff --git a/modules/dnn/src/webgpu/include/buffer.hpp b/modules/dnn/src/webgpu/include/buffer.hpp index 654b5cda8b03..16471e573681 100644 --- a/modules/dnn/src/webgpu/include/buffer.hpp +++ b/modules/dnn/src/webgpu/include/buffer.hpp @@ -27,9 +27,9 @@ class Buffer wgpu::BufferUsage getBufferUsage() { return usage_;} static void BufferMapReadCallback(WGPUBufferMapAsyncStatus status, - const void* data, - uint64_t dataLength, - void* userdata) + const void* data, + uint64_t dataLength, + void* userdata) { static_cast(userdata)->mappedData = data; } diff --git a/modules/dnn/src/webgpu/include/op_base.hpp b/modules/dnn/src/webgpu/include/op_base.hpp index fb6d1e85a466..90ba108371e1 100644 --- a/modules/dnn/src/webgpu/include/op_base.hpp +++ b/modules/dnn/src/webgpu/include/op_base.hpp @@ -36,7 +36,7 @@ class OpBase void createCommandBuffer(); void runCommandBuffer(); wgpu::FenceCompletionStatus WaitForCompletedValue(wgpu::Fence fence, - uint64_t completedValue); + uint64_t completedValue); std::shared_ptr device_; wgpu::ComputePipeline pipeline_; diff --git a/modules/dnn/src/webgpu/include/tensor.hpp b/modules/dnn/src/webgpu/include/tensor.hpp index 273be406c4df..505fe5395a69 100644 --- a/modules/dnn/src/webgpu/include/tensor.hpp +++ b/modules/dnn/src/webgpu/include/tensor.hpp @@ -13,7 +13,7 @@ class Tensor{ public: Tensor(Format fmt = wFormatFp32); Tensor(const void* data, std::vector& shape, - Format fmt = wFormatFp32); + Format fmt = wFormatFp32); const void* mapRead(); void unMap(); Shape getShape() const; @@ -24,8 +24,8 @@ class Tensor{ // Copy data if data != NULL // Allocate new internal buffer if new size > old size or alloc flag is true Tensor reshape(const void* data, const std::vector& shape, - bool alloc = false, - Format fmt = wFormatInvalid); + bool alloc = false, + Format fmt = wFormatInvalid); Tensor fillData(const void * data); int getFormat() const; size_t size() const { return size_in_byte_; } @@ -38,7 +38,8 @@ class Tensor{ size_t size_in_byte_; std::shared_ptr buffer_; Format format_; - wgpu::BufferUsage usage_; + wgpu::BufferUsage usage_ = wgpu::BufferUsage::Storage | + wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; }; // #endif //HAVE_WEBGPU diff --git a/modules/dnn/src/webgpu/src/buffer.cpp b/modules/dnn/src/webgpu/src/buffer.cpp index 1637114ac62f..77fb4f4dcdaa 100644 --- a/modules/dnn/src/webgpu/src/buffer.cpp +++ b/modules/dnn/src/webgpu/src/buffer.cpp @@ -7,13 +7,14 @@ namespace cv { namespace dnn { namespace webgpu { Buffer::Buffer(std::shared_ptr device) { - device_ = device; - usage_ = wgpu::BufferUsage::Storage; + device_ = device; + usage_ = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst | + wgpu::BufferUsage::CopySrc; } Buffer::Buffer(std::shared_ptr device, - const void* data, size_t size, - wgpu::BufferUsage usage) + const void* data, size_t size, + wgpu::BufferUsage usage) { device_ = device; usage_ = usage; @@ -26,7 +27,7 @@ Buffer::Buffer(std::shared_ptr device, } Buffer::Buffer(const void* data, size_t size, - wgpu::BufferUsage usage) + wgpu::BufferUsage usage) { createContext(); device_ = wDevice; @@ -55,8 +56,8 @@ const void* Buffer::MapReadAsyncAndWait() gpuReadBuffer_ = device_->CreateBuffer(& desc); } wgpu::CommandEncoder encoder = device_->CreateCommandEncoder(); - encoder.CopyBufferToBuffer( buffer_, 0, - gpuReadBuffer_, 0, size_); + encoder.CopyBufferToBuffer(buffer_, 0, + gpuReadBuffer_, 0, size_); wgpu::CommandBuffer cmdBuffer = encoder.Finish(); encoder.Release(); wQueue->Submit(1, &cmdBuffer); diff --git a/modules/dnn/src/webgpu/src/context.cpp b/modules/dnn/src/webgpu/src/context.cpp index a5f6e088ff04..8791e9fa6525 100644 --- a/modules/dnn/src/webgpu/src/context.cpp +++ b/modules/dnn/src/webgpu/src/context.cpp @@ -39,13 +39,11 @@ bool isAvailable() } Context::Context() { - // create wgpu::Device wDevice = std::make_shared(createCppDawnDevice()); wQueue = std::make_shared(wDevice->GetDefaultQueue()); } Context::~Context() { - // how to release object wDevice->Release(); wQueue->Release(); } diff --git a/modules/dnn/src/webgpu/src/context.hpp b/modules/dnn/src/webgpu/src/context.hpp index 046f25fd1107..1588fab79192 100644 --- a/modules/dnn/src/webgpu/src/context.hpp +++ b/modules/dnn/src/webgpu/src/context.hpp @@ -9,10 +9,6 @@ class Context public: Context(); ~Context(); -std::shared_ptr wCtx; -std::shared_ptr wDevice = nullptr; -std::shared_ptr wQueue = nullptr; -cv::Mutex wContextMtx; }; void createContext(); diff --git a/modules/dnn/src/webgpu/src/internal.cpp b/modules/dnn/src/webgpu/src/internal.cpp index e2bee3e09ed8..a888810de1a5 100644 --- a/modules/dnn/src/webgpu/src/internal.cpp +++ b/modules/dnn/src/webgpu/src/internal.cpp @@ -58,7 +58,7 @@ void bindTensor(Tensor& tensor, uint32_t binding, } void bindUniform(Buffer& buffer, uint32_t binding, - std::vector& bgEntries) + std::vector& bgEntries) { wgpu::BindGroupEntry bgEntry = {}; bgEntry.binding = binding; @@ -106,11 +106,11 @@ void computeConvOutputShapeAndPadding(const PaddingMode& padding_mode, } void computePoolOutputShape(const PaddingMode& padding_mode, - const int& padding_top, const int& padding_left, - const int& in_h, const int& in_w, - const int& filter_h, const int& filter_w, - const int& stride_h, const int& stride_w, - int& out_h, int& out_w) + const int& padding_top, const int& padding_left, + const int& in_h, const int& in_w, + const int& filter_h, const int& filter_w, + const int& stride_h, const int& stride_w, + int& out_h, int& out_w) { if (padding_mode == wPaddingModeValid) { diff --git a/modules/dnn/src/webgpu/src/internal.hpp b/modules/dnn/src/webgpu/src/internal.hpp index 64ea77235383..94b1bf316fb3 100644 --- a/modules/dnn/src/webgpu/src/internal.hpp +++ b/modules/dnn/src/webgpu/src/internal.hpp @@ -21,7 +21,7 @@ std::vector compile(const std::string& name, void bindTensor(Tensor& tensor, uint32_t binding, std::vector& bgEntries); void bindUniform(Buffer& buffer, uint32_t binding, - std::vector& bgEntries); + std::vector& bgEntries); void computeConvOutputShapeAndPadding(const PaddingMode& padding_mode, int& padding_top, int& padding_left, const int& in_h, const int& in_w, diff --git a/modules/dnn/src/webgpu/src/tensor.cpp b/modules/dnn/src/webgpu/src/tensor.cpp index a92f1a40f7db..f8b4094c7c96 100644 --- a/modules/dnn/src/webgpu/src/tensor.cpp +++ b/modules/dnn/src/webgpu/src/tensor.cpp @@ -15,7 +15,7 @@ Tensor::Tensor(const void* data, std::vector& shape, Format fmt) createContext(); device_ = wDevice; size_in_byte_ = 0; - usage_ = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopySrc | wgpu::BufferUsage::CopyDst; + usage_ = wgpu::BufferUsage::Storage | wgpu::BufferUsage::CopyDst | wgpu::BufferUsage::CopySrc; format_ = fmt; reshape(data, shape); } @@ -54,7 +54,7 @@ int Tensor::dimNum() const } Tensor Tensor::reshape(const void* data, const std::vector& shape, - bool alloc, Format fmt) + bool alloc, Format fmt) { if (device_ == nullptr) { @@ -76,6 +76,7 @@ Tensor Tensor::reshape(const void* data, const std::vector& shape, return * this; } fillData(data); + return * this; } Tensor Tensor::fillData(const void * data) diff --git a/modules/dnn/test/test_common.impl.hpp b/modules/dnn/test/test_common.impl.hpp index b33b8e40d9e7..9293e07f0ea6 100644 --- a/modules/dnn/test/test_common.impl.hpp +++ b/modules/dnn/test/test_common.impl.hpp @@ -25,6 +25,7 @@ void PrintTo(const cv::dnn::Backend& v, std::ostream* os) case DNN_BACKEND_HALIDE: *os << "HALIDE"; return; case DNN_BACKEND_INFERENCE_ENGINE: *os << "DLIE*"; return; case DNN_BACKEND_VKCOM: *os << "VKCOM"; return; + case DNN_BACKEND_WGPU: *os << "WGPU"; return; case DNN_BACKEND_OPENCV: *os << "OCV"; return; case DNN_BACKEND_CUDA: *os << "CUDA"; return; case DNN_BACKEND_INFERENCE_ENGINE_NN_BUILDER_2019: *os << "DLIE"; return; @@ -41,6 +42,7 @@ void PrintTo(const cv::dnn::Target& v, std::ostream* os) case DNN_TARGET_OPENCL_FP16: *os << "OCL_FP16"; return; case DNN_TARGET_MYRIAD: *os << "MYRIAD"; return; case DNN_TARGET_VULKAN: *os << "VULKAN"; return; + case DNN_TARGET_WGPU: *os << "WGPU"; return; case DNN_TARGET_FPGA: *os << "FPGA"; return; case DNN_TARGET_CUDA: *os << "CUDA"; return; case DNN_TARGET_CUDA_FP16: *os << "CUDA_FP16"; return;