From e1b8ce4e360c64f6d8e81230b535c396d7d18833 Mon Sep 17 00:00:00 2001 From: Yulong Wang <7679871+fs-eire@users.noreply.github.com> Date: Tue, 8 Oct 2024 16:10:46 -0700 Subject: [PATCH] Initial WebGPU EP checkin (#22318) This change introduces the WebGPU EP into ONNX Runtime. To make the PR as simple as possible, this PR excluded the following: - C API changes for WebGPU EP - actual implementation of WebGPU EP. Currently in this PR, WebGPU is a stub implementation that does not register any kernel. - Python IO Binding update - Node.js IO Binding update This PR now contains only 43 file changes (while the working branch contains 130+) and hopefully this makes it easier to review. There is going to be separated PRs for each mentioned above. Current working branch: #21904 --- cmake/CMakeLists.txt | 22 ++++ .../external/onnxruntime_external_deps.cmake | 82 ++++++++++--- cmake/onnxruntime.cmake | 67 ++++++++++- cmake/onnxruntime_nodejs.cmake | 5 +- cmake/onnxruntime_objectivec.cmake | 14 --- cmake/onnxruntime_providers.cmake | 7 ++ cmake/onnxruntime_providers_cpu.cmake | 5 + cmake/onnxruntime_providers_webgpu.cmake | 27 +++++ cmake/onnxruntime_python.cmake | 1 + cmake/onnxruntime_unittests.cmake | 12 ++ cmake/patches/dawn/dawn.patch | 66 +++++++++++ include/onnxruntime/core/graph/constants.h | 1 + .../webgpu/webgpu_provider_factory.h | 14 +++ .../main/java/ai/onnxruntime/OrtProvider.java | 4 +- .../webgpu/webgpu_contrib_kernels.cc | 34 ++++++ .../webgpu/webgpu_contrib_kernels.h | 20 ++++ .../platform/apple/logging/apple_log_sink.mm | 2 - .../core/providers/get_execution_providers.cc | 8 ++ .../providers/provider_factory_creators.h | 4 + onnxruntime/core/providers/webgpu/symbols.txt | 0 .../webgpu/webgpu_execution_provider.cc | 108 ++++++++++++++++++ .../webgpu/webgpu_execution_provider.h | 37 ++++++ .../webgpu/webgpu_provider_factory.cc | 24 ++++ .../webgpu/webgpu_provider_factory_creator.h | 18 +++ onnxruntime/core/session/inference_session.cc | 8 +- .../core/session/provider_registration.cc | 6 + .../python/onnxruntime_pybind_state.cc | 4 + onnxruntime/test/onnx/main.cc | 62 +++++++++- .../test/perftest/command_args_parser.cc | 6 +- onnxruntime/test/perftest/ort_test_session.cc | 7 ++ .../ios_package_uitest_cpp_api.mm | 23 +++- .../macos_package_uitest_cpp_api.mm | 24 +++- onnxruntime/test/providers/base_tester.cc | 3 + .../providers/compare_provider_test_utils.cc | 2 + onnxruntime/test/util/default_providers.cc | 9 ++ .../test/util/include/default_providers.h | 1 + tools/ci_build/build.py | 5 + tools/ci_build/gen_def.py | 1 + .../apple/build_and_assemble_apple_pods.py | 2 + .../win-gpu-webgpu-ci-pipeline.yml | 58 ++++++++++ tools/ci_build/set-trigger-rules.py | 1 + 41 files changed, 748 insertions(+), 56 deletions(-) create mode 100644 cmake/onnxruntime_providers_webgpu.cmake create mode 100644 cmake/patches/dawn/dawn.patch create mode 100644 include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h create mode 100644 onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.cc create mode 100644 onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.h create mode 100644 onnxruntime/core/providers/webgpu/symbols.txt create mode 100644 onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc create mode 100644 onnxruntime/core/providers/webgpu/webgpu_execution_provider.h create mode 100644 onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc create mode 100644 onnxruntime/core/providers/webgpu/webgpu_provider_factory_creator.h create mode 100644 tools/ci_build/github/azure-pipelines/win-gpu-webgpu-ci-pipeline.yml diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index d56b1cd3f5201..ec536205b228e 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -148,6 +148,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only") option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF) option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF) +option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF) # Options related to reducing the binary size produced by the build # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON @@ -490,6 +491,22 @@ if (onnxruntime_BUILD_CSHARP) endif() endif() +if (onnxruntime_BUILD_OBJC) + check_language(OBJC) + if(CMAKE_OBJC_COMPILER) + enable_language(OBJC) + else() + message(FATAL_ERROR "Objective-C is not supported.") + endif() + + check_language(OBJCXX) + if(CMAKE_OBJCXX_COMPILER) + enable_language(OBJCXX) + else() + message(FATAL_ERROR "Objective-C++ is not supported.") + endif() +endif() + if (NOT WIN32) #TODO: On Linux we may try https://github.com/microsoft/TraceLogging.git if (onnxruntime_ENABLE_INSTRUMENT) @@ -917,6 +934,11 @@ if (onnxruntime_USE_WEBNN) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1) list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn) endif() +if (onnxruntime_USE_WEBGPU) + list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1) + list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1) + list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu) +endif() if (onnxruntime_USE_CANN) list(APPEND ORT_PROVIDER_FLAGS -DUSE_CANN=1) list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1) diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index cb737ee53639f..85746027d4e8c 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -576,10 +576,11 @@ if (onnxruntime_USE_MIMALLOC) onnxruntime_fetchcontent_makeavailable(mimalloc) endif() -#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn, -# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread -# pthread is always at the last -set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME}) +set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json + onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface + flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date + ${ONNXRUNTIME_CLOG_TARGET_NAME}) + # The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types. # The other libs do not have the problem. All the sources are already there. We can compile them in any order. set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers) @@ -634,24 +635,73 @@ if (onnxruntime_USE_COREML) FetchContent_Populate(coremltools) endif() -message(STATUS "Finished fetching external dependencies") +if (onnxruntime_USE_WEBGPU) + FetchContent_Declare( + dawn + URL ${DEP_URL_dawn} + URL_HASH SHA1=${DEP_SHA1_dawn} + PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch + ) -set(onnxruntime_LINK_DIRS ) + # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size + set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) + set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE) + set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE) + + # disable things we don't use + set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF) + set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE) + set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE) + set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE) + set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE) + set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE) + set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE) + + set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE) + set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving + set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled. + + # SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V. + set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE) + + if (WIN32) + # building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON. + set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE) + set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE) + + # Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it. + set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE) + endif() + + onnxruntime_fetchcontent_makeavailable(dawn) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc) +endif() + +set(onnxruntime_LINK_DIRS) if (onnxruntime_USE_CUDA) - find_package(CUDAToolkit REQUIRED) + find_package(CUDAToolkit REQUIRED) - if(onnxruntime_CUDNN_HOME) - file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME) - set(CUDNN_PATH ${onnxruntime_CUDNN_HOME}) - endif() - include(cuDNN) + if(onnxruntime_CUDNN_HOME) + file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME) + set(CUDNN_PATH ${onnxruntime_CUDNN_HOME}) + endif() + + include(cuDNN) endif() if(onnxruntime_USE_SNPE) - include(external/find_snpe.cmake) - list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) + include(external/find_snpe.cmake) + list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS}) endif() -FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) -FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) +FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR) +FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR) + +message(STATUS "Finished fetching external dependencies") diff --git a/cmake/onnxruntime.cmake b/cmake/onnxruntime.cmake index 5ce5d95541d8f..c5c991d66878e 100644 --- a/cmake/onnxruntime.cmake +++ b/cmake/onnxruntime.cmake @@ -90,10 +90,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK) # create Info.plist for the framework and podspec for CocoaPods (optional) set(MACOSX_FRAMEWORK_NAME "onnxruntime") set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime") - # Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled + + # Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled. if(onnxruntime_USE_COREML) - set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"") + list(APPEND _weak_frameworks "\\\"CoreML\\\"") + endif() + + if(onnxruntime_USE_WEBGPU) + list(APPEND _weak_frameworks "\\\"QuartzCore\\\"") + list(APPEND _weak_frameworks "\\\"IOSurface\\\"") + list(APPEND _weak_frameworks "\\\"Metal\\\"") endif() + + if (_weak_frameworks) + string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks}) + endif() + set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist") configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH}) configure_file( @@ -202,6 +214,7 @@ set(onnxruntime_INTERNAL_LIBRARIES ${PROVIDERS_RKNPU} ${PROVIDERS_VSINPU} ${PROVIDERS_XNNPACK} + ${PROVIDERS_WEBGPU} ${PROVIDERS_WEBNN} ${PROVIDERS_AZURE} ${PROVIDERS_INTERNAL_TESTING} @@ -366,16 +379,58 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK) endif() endforeach() + # helper function that recurses to also handle static library dependencies of the ORT external libraries + set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies + function(add_symlink_for_static_lib_and_dependencies lib) + function(process cur_target) + # de-alias if applicable so a consistent target name is used + get_target_property(alias ${cur_target} ALIASED_TARGET) + if(TARGET ${alias}) + set(cur_target ${alias}) + endif() + + if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies) + return() + endif() + + list(APPEND lib_and_dependencies ${cur_target}) + + get_target_property(link_libraries ${cur_target} LINK_LIBRARIES) + foreach(dependency ${link_libraries}) + if(TARGET ${dependency}) + process(${dependency}) + endif() + endforeach() + + set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE) + endfunction() + + set(lib_and_dependencies) + process(${lib}) + + foreach(_target ${lib_and_dependencies}) + get_target_property(type ${_target} TYPE) + if(${type} STREQUAL "STATIC_LIBRARY") + # message(STATUS "Adding symlink for ${_target}") + add_custom_command(TARGET onnxruntime POST_BUILD + COMMAND ${CMAKE_COMMAND} -E create_symlink + $ ${STATIC_LIB_DIR}/$) + endif() + endforeach() + + list(APPEND _processed_libs ${lib_and_dependencies}) + set(_processed_libs ${_processed_libs} PARENT_SCOPE) + endfunction() + # for external libraries we create a symlink to the .a file foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES}) - if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target + if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target continue() endif() + GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE) if(_LIB_TYPE STREQUAL "STATIC_LIBRARY") - add_custom_command(TARGET onnxruntime POST_BUILD - COMMAND ${CMAKE_COMMAND} -E create_symlink - $ ${STATIC_LIB_DIR}/$) + add_symlink_for_static_lib_and_dependencies(${_LIB}) endif() endforeach() diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake index f11928c11cf14..376d895be34a9 100644 --- a/cmake/onnxruntime_nodejs.cmake +++ b/cmake/onnxruntime_nodejs.cmake @@ -67,6 +67,9 @@ endif() if (onnxruntime_USE_DML) set(NODEJS_BINDING_USE_DML "--use_dml") endif() +if (onnxruntime_USE_WEBGPU) + set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu") +endif() if (onnxruntime_USE_TENSORRT) set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt") endif() @@ -92,7 +95,7 @@ add_custom_target(js_common_npm_ci ALL add_custom_target(nodejs_binding_wrapper ALL COMMAND ${NPM_CLI} ci COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR} - --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT} + --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT} ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN} WORKING_DIRECTORY ${JS_NODE_ROOT} COMMENT "Using cmake-js to build OnnxRuntime Node.js binding") diff --git a/cmake/onnxruntime_objectivec.cmake b/cmake/onnxruntime_objectivec.cmake index 4be2f51a96ebc..7c9831f0194d0 100644 --- a/cmake/onnxruntime_objectivec.cmake +++ b/cmake/onnxruntime_objectivec.cmake @@ -9,20 +9,6 @@ if(NOT onnxruntime_BUILD_SHARED_LIB) message(FATAL_ERROR "The Objective-C API requires onnxruntime_BUILD_SHARED_LIB to be enabled.") endif() -check_language(OBJC) -if(CMAKE_OBJC_COMPILER) - enable_language(OBJC) -else() - message(FATAL_ERROR "Objective-C is not supported.") -endif() - -check_language(OBJCXX) -if(CMAKE_OBJCXX_COMPILER) - enable_language(OBJCXX) -else() - message(FATAL_ERROR "Objective-C++ is not supported.") -endif() - add_compile_options( "$<$:-Wall>" "$<$:-Wextra>") diff --git a/cmake/onnxruntime_providers.cmake b/cmake/onnxruntime_providers.cmake index 6bae330c8b4c1..0df84854a20c0 100644 --- a/cmake/onnxruntime_providers.cmake +++ b/cmake/onnxruntime_providers.cmake @@ -114,6 +114,9 @@ endif() if(onnxruntime_USE_WEBNN) set(PROVIDERS_WEBNN onnxruntime_providers_webnn) endif() +if(onnxruntime_USE_WEBGPU) + set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu) +endif() if (onnxruntime_USE_CANN) set(PROVIDERS_CANN onnxruntime_providers_cann) endif() @@ -155,6 +158,10 @@ if (onnxruntime_USE_WEBNN) include(onnxruntime_providers_webnn.cmake) endif() +if (onnxruntime_USE_WEBGPU) + include(onnxruntime_providers_webgpu.cmake) +endif() + if (onnxruntime_USE_NNAPI_BUILTIN) include(onnxruntime_providers_nnapi.cmake) endif() diff --git a/cmake/onnxruntime_providers_cpu.cmake b/cmake/onnxruntime_providers_cpu.cmake index 295a8bbca70f7..91a2b13002ec9 100644 --- a/cmake/onnxruntime_providers_cpu.cmake +++ b/cmake/onnxruntime_providers_cpu.cmake @@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc" ) +file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h" + "${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc" +) + file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS "${ONNXRUNTIME_ROOT}/core/providers/*.h" "${ONNXRUNTIME_ROOT}/core/providers/*.cc" diff --git a/cmake/onnxruntime_providers_webgpu.cmake b/cmake/onnxruntime_providers_webgpu.cmake new file mode 100644 index 0000000000000..eb25c55ab23e0 --- /dev/null +++ b/cmake/onnxruntime_providers_webgpu.cmake @@ -0,0 +1,27 @@ +# Copyright (c) Microsoft Corporation. All rights reserved. +# Licensed under the MIT License. + + if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD) + message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'") + endif() + + add_compile_definitions(USE_WEBGPU=1) + if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS) + add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1) + endif() + file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS + "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h" + "${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc" + ) + if(NOT onnxruntime_DISABLE_CONTRIB_OPS) + source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs}) + list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs}) + endif() + + source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs}) + onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs}) + onnxruntime_add_include_to_target(onnxruntime_providers_webgpu + onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface) + target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc) + + set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime") diff --git a/cmake/onnxruntime_python.cmake b/cmake/onnxruntime_python.cmake index cb69886ce671a..0d038d210ea2b 100644 --- a/cmake/onnxruntime_python.cmake +++ b/cmake/onnxruntime_python.cmake @@ -178,6 +178,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE ${PROVIDERS_ACL} ${PROVIDERS_ARMNN} ${PROVIDERS_XNNPACK} + ${PROVIDERS_WEBGPU} ${PROVIDERS_AZURE} ${PROVIDERS_QNN} onnxruntime_optimizer diff --git a/cmake/onnxruntime_unittests.cmake b/cmake/onnxruntime_unittests.cmake index 7fbedb6059621..e148215200e4f 100644 --- a/cmake/onnxruntime_unittests.cmake +++ b/cmake/onnxruntime_unittests.cmake @@ -563,6 +563,10 @@ if(onnxruntime_USE_JSEP) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js) endif() +if(onnxruntime_USE_WEBGPU) + list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu) +endif() + if(onnxruntime_USE_RKNPU) list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu) endif() @@ -608,6 +612,7 @@ set(ONNXRUNTIME_TEST_LIBS ${PROVIDERS_NNAPI} ${PROVIDERS_VSINPU} ${PROVIDERS_JS} + ${PROVIDERS_WEBGPU} ${PROVIDERS_QNN} ${PROVIDERS_SNPE} ${PROVIDERS_RKNPU} @@ -670,6 +675,13 @@ if(onnxruntime_USE_JSEP) list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js) endif() +if(onnxruntime_USE_WEBGPU) + list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/webgpu/*) + list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu) + list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu) + list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu) +endif() + # QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal # or reduced op builds. if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD) diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch new file mode 100644 index 0000000000000..d696d386452e8 --- /dev/null +++ b/cmake/patches/dawn/dawn.patch @@ -0,0 +1,66 @@ +diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt +index 9c0bd6fa4e..bf8a57aeac 100644 +--- a/src/dawn/native/CMakeLists.txt ++++ b/src/dawn/native/CMakeLists.txt +@@ -857,6 +857,11 @@ if (DAWN_ENABLE_SWIFTSHADER) + target_compile_definitions(dawn_native PRIVATE "DAWN_ENABLE_SWIFTSHADER") + endif() + ++if (IOS) ++ target_compile_options(dawn_native_objects PRIVATE -fno-objc-arc) ++ target_compile_options(dawn_native PRIVATE -fno-objc-arc) ++endif() ++ + if (DAWN_BUILD_MONOLITHIC_LIBRARY) + ############################################################################### + # Do the 'complete_lib' build. +diff --git a/src/dawn/native/Surface_metal.mm b/src/dawn/native/Surface_metal.mm +index ce55acbd43..baa4835362 100644 +--- a/src/dawn/native/Surface_metal.mm ++++ b/src/dawn/native/Surface_metal.mm +@@ -36,7 +36,13 @@ + namespace dawn::native { + + bool InheritsFromCAMetalLayer(void* obj) { +- id object = static_cast(obj); ++ id object = ++#if TARGET_OS_IOS ++ (__bridge id)obj; ++#else ++ static_cast(obj); ++#endif ++ + return [object isKindOfClass:[CAMetalLayer class]]; + } + +diff --git a/src/dawn/native/metal/SharedFenceMTL.mm b/src/dawn/native/metal/SharedFenceMTL.mm +index bde8bfea07..f2f6459e91 100644 +--- a/src/dawn/native/metal/SharedFenceMTL.mm ++++ b/src/dawn/native/metal/SharedFenceMTL.mm +@@ -40,7 +40,13 @@ ResultOrError> SharedFence::Create( + DAWN_INVALID_IF(descriptor->sharedEvent == nullptr, "MTLSharedEvent is missing."); + if (@available(macOS 10.14, iOS 12.0, *)) { + return AcquireRef(new SharedFence( +- device, label, static_cast>(descriptor->sharedEvent))); ++ device, label, ++#if TARGET_OS_IOS ++ (__bridge id)(descriptor->sharedEvent) ++#else ++ static_cast>(descriptor->sharedEvent) ++#endif ++ )); + } else { + return DAWN_INTERNAL_ERROR("MTLSharedEvent not supported."); + } +diff --git a/src/tint/api/BUILD.cmake b/src/tint/api/BUILD.cmake +index 0037d83276..6372c4ee77 100644 +--- a/src/tint/api/BUILD.cmake ++++ b/src/tint/api/BUILD.cmake +@@ -57,6 +57,7 @@ tint_target_add_dependencies(tint_api lib + tint_lang_wgsl_ast_transform + tint_lang_wgsl_common + tint_lang_wgsl_features ++ tint_lang_wgsl_inspector + tint_lang_wgsl_program + tint_lang_wgsl_sem + tint_lang_wgsl_writer_ir_to_program diff --git a/include/onnxruntime/core/graph/constants.h b/include/onnxruntime/core/graph/constants.h index f76fae360a9f9..8b3e5e48e7004 100644 --- a/include/onnxruntime/core/graph/constants.h +++ b/include/onnxruntime/core/graph/constants.h @@ -51,6 +51,7 @@ constexpr const char* kSnpeExecutionProvider = "SNPEExecutionProvider"; constexpr const char* kTvmExecutionProvider = "TvmExecutionProvider"; constexpr const char* kXnnpackExecutionProvider = "XnnpackExecutionProvider"; constexpr const char* kWebNNExecutionProvider = "WebNNExecutionProvider"; +constexpr const char* kWebGpuExecutionProvider = "WebGpuExecutionProvider"; constexpr const char* kCannExecutionProvider = "CANNExecutionProvider"; constexpr const char* kAzureExecutionProvider = "AzureExecutionProvider"; constexpr const char* kVSINPUExecutionProvider = "VSINPUExecutionProvider"; diff --git a/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h b/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h new file mode 100644 index 0000000000000..0b45b847d651f --- /dev/null +++ b/include/onnxruntime/core/providers/webgpu/webgpu_provider_factory.h @@ -0,0 +1,14 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +// Dummy file to provide a signal in the ONNX Runtime C cocoapod as to whether the WebGPU EP was included in the build. +// If it was, this file will be included in the cocoapod, and a test like this can be used: +// +// #if __has_include() +// #define WEBGPU_EP_AVAILABLE 1 +// #else +// #define WEBGPU_EP_AVAILABLE 0 +// #endif + +// The WebGPU EP can be enabled via the generic SessionOptionsAppendExecutionProvider method, so no direct usage of +// the provider factory is required. diff --git a/java/src/main/java/ai/onnxruntime/OrtProvider.java b/java/src/main/java/ai/onnxruntime/OrtProvider.java index 0e2883fe23088..1740ac7eeef00 100644 --- a/java/src/main/java/ai/onnxruntime/OrtProvider.java +++ b/java/src/main/java/ai/onnxruntime/OrtProvider.java @@ -42,7 +42,9 @@ public enum OrtProvider { /** The Azure remote endpoint execution provider. */ AZURE("AzureExecutionProvider"), /** The QNN execution provider. */ - QNN("QNNExecutionProvider"); + QNN("QNNExecutionProvider"), + /** The WebGPU execution provider */ + WEBGPU("WebGpuExecutionProvider"); private static final Map valueMap = new HashMap<>(values().length); diff --git a/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.cc b/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.cc new file mode 100644 index 0000000000000..8ed1372cd0e62 --- /dev/null +++ b/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.cc @@ -0,0 +1,34 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "contrib_ops/webgpu/webgpu_contrib_kernels.h" + +#include "core/framework/op_kernel.h" + +namespace onnxruntime { +namespace contrib { +namespace webgpu { + +template <> +KernelCreateInfo BuildKernelCreateInfo() { + KernelCreateInfo info; + return info; +} + +Status RegisterWebGpuContribKernels(KernelRegistry& kernel_registry) { + static const BuildKernelCreateInfoFn function_table[] = { + BuildKernelCreateInfo, // default entry to avoid the list become empty after ops-reducing + }; + + for (auto& function_table_entry : function_table) { + KernelCreateInfo info = function_table_entry(); + if (info.kernel_def != nullptr) { // filter disabled entries where type is void + ORT_RETURN_IF_ERROR(kernel_registry.Register(std::move(info))); + } + } + return Status::OK(); +} + +} // namespace webgpu +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.h b/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.h new file mode 100644 index 0000000000000..d73859de78239 --- /dev/null +++ b/onnxruntime/contrib_ops/webgpu/webgpu_contrib_kernels.h @@ -0,0 +1,20 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/kernel_registry.h" + +namespace onnxruntime { +namespace contrib { +namespace webgpu { + +// forward declaration for this EP's namespace. +template +KernelCreateInfo BuildKernelCreateInfo(); + +Status RegisterWebGpuContribKernels(KernelRegistry& kernel_registry); + +} // namespace webgpu +} // namespace contrib +} // namespace onnxruntime diff --git a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm index 00e691a8f9fd3..6abbe76a7f151 100644 --- a/onnxruntime/core/platform/apple/logging/apple_log_sink.mm +++ b/onnxruntime/core/platform/apple/logging/apple_log_sink.mm @@ -7,8 +7,6 @@ #include -#include "date/date.h" - namespace onnxruntime { namespace logging { diff --git a/onnxruntime/core/providers/get_execution_providers.cc b/onnxruntime/core/providers/get_execution_providers.cc index 61c035bc29ed5..d2a72c3a38b03 100644 --- a/onnxruntime/core/providers/get_execution_providers.cc +++ b/onnxruntime/core/providers/get_execution_providers.cc @@ -162,6 +162,14 @@ constexpr ProviderInfo kProvidersInPriorityOrder[] = true, #else false, +#endif + }, + { + kWebGpuExecutionProvider, +#ifdef USE_WEBGPU + true, +#else + false, #endif }, { diff --git a/onnxruntime/core/providers/provider_factory_creators.h b/onnxruntime/core/providers/provider_factory_creators.h index da5de83a29c11..25d02a48c13c5 100644 --- a/onnxruntime/core/providers/provider_factory_creators.h +++ b/onnxruntime/core/providers/provider_factory_creators.h @@ -95,6 +95,10 @@ #include "core/providers/webnn/webnn_provider_factory_creator.h" #endif +#if defined(USE_WEBGPU) +#include "core/providers/webgpu/webgpu_provider_factory_creator.h" +#endif + #if defined(USE_CANN) #include "core/providers/cann/cann_provider_factory_creator.h" #endif diff --git a/onnxruntime/core/providers/webgpu/symbols.txt b/onnxruntime/core/providers/webgpu/symbols.txt new file mode 100644 index 0000000000000..e69de29bb2d1d diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc new file mode 100644 index 0000000000000..00ebdd5583d2e --- /dev/null +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -0,0 +1,108 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/webgpu/webgpu_execution_provider.h" + +#include +#include +#include +#include +#include + +#ifndef DISABLE_CONTRIB_OPS +#include "contrib_ops/webgpu/webgpu_contrib_kernels.h" +#endif + +#include "core/framework/compute_capability.h" +#include "core/framework/data_transfer_manager.h" +#include "core/framework/fallback_cpu_capability.h" +#include "core/framework/kernel_registry.h" +#include "core/graph/function_utils.h" +#include "core/graph/indexed_sub_graph.h" + +namespace onnxruntime { + +namespace webgpu { +template <> +KernelCreateInfo BuildKernelCreateInfo() { + KernelCreateInfo info; + return info; +} + +class Memcpy final : public OpKernel { + public: + Memcpy(const OpKernelInfo& info) : OpKernel(info) {} + + Status Compute(OpKernelContext* ctx) const override { + const auto* X = ctx->Input(0); + Tensor* Y = ctx->Output(0, X->Shape()); + return Info().GetDataTransferManager().CopyTensor(*X, *Y); + } +}; + +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, MemcpyFromHost); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, MemcpyToHost); + +ONNX_OPERATOR_KERNEL_EX( + MemcpyFromHost, + kOnnxDomain, + 1, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .InputMemoryType(OrtMemTypeCPU, 0) + .ExecQueueId(0) + .TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()), + Memcpy); + +ONNX_OPERATOR_KERNEL_EX( + MemcpyToHost, + kOnnxDomain, + 1, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()) + .OutputMemoryType(OrtMemTypeCPU, 0) + .ExecQueueId(1) + .TypeConstraint("T", DataTypeImpl::AllFixedSizeTensorTypes()), + Memcpy); + +std::unique_ptr RegisterKernels() { + auto kernel_registry = std::make_unique(); + + static const BuildKernelCreateInfoFn function_table[] = { + BuildKernelCreateInfo, // default entry to avoid the list becoming empty after ops-reducing + BuildKernelCreateInfo, + BuildKernelCreateInfo, + }; + + for (auto& function_table_entry : function_table) { + KernelCreateInfo info = function_table_entry(); + if (info.kernel_def != nullptr) { // filter disabled entries where type is void + ORT_THROW_IF_ERROR(kernel_registry->Register(std::move(info))); + } + } + +#ifndef DISABLE_CONTRIB_OPS + Status status = ::onnxruntime::contrib::webgpu::RegisterWebGpuContribKernels(*kernel_registry); + ORT_ENFORCE(status.IsOK(), "Failed to register WebGPU contrib kernels: " + status.ErrorMessage()); +#endif + + return kernel_registry; +} + +} // namespace webgpu + +using namespace webgpu; + +WebGpuExecutionProvider::WebGpuExecutionProvider() + : IExecutionProvider{kWebGpuExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0)} {} + +std::shared_ptr WebGpuExecutionProvider::GetKernelRegistry() const { + static std::shared_ptr registry = webgpu::RegisterKernels(); + + return registry; +} + +WebGpuExecutionProvider::~WebGpuExecutionProvider() { +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h new file mode 100644 index 0000000000000..537ecb9301f67 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.h @@ -0,0 +1,37 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Copyright (c) 2019, NXP Semiconductor, Inc. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/execution_provider.h" +#include "core/framework/session_options.h" +#include "core/graph/constants.h" +#include "core/providers/providers.h" + +namespace onnxruntime { +namespace webgpu { + +// forward declaration for this EP's namespace. +template +KernelCreateInfo BuildKernelCreateInfo(); + +} // namespace webgpu + +class WebGpuExecutionProvider : public IExecutionProvider { + public: + WebGpuExecutionProvider(); + ~WebGpuExecutionProvider() override; + + std::shared_ptr GetKernelRegistry() const override; + + DataLayout GetPreferredLayout() const override { return DataLayout::NHWC; } + + FusionStyle GetFusionStyle() const override { return FusionStyle::FilteredGraphViewer; } + + // WebGPU EP disallow concurrent run because actual implementation (eg. WebGPU backend) relies on global states to + // work, and concurrent run with async function may mess up the states and cause undefined behavior. + bool ConcurrentRunSupported() const override { return false; } +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc new file mode 100644 index 0000000000000..1a1f1a438c750 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/webgpu_provider_factory.cc @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include + +#include "core/framework/error_code_helper.h" +#include "core/providers/webgpu/webgpu_provider_factory_creator.h" +#include "core/providers/webgpu/webgpu_execution_provider.h" + +namespace onnxruntime { + +struct WebGpuProviderFactory : IExecutionProviderFactory { + WebGpuProviderFactory() {} + + std::unique_ptr CreateProvider() override { + return std::make_unique(); + } +}; + +std::shared_ptr WebGpuProviderFactoryCreator::Create(const ConfigOptions&) { + return std::make_shared(); +} + +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/webgpu/webgpu_provider_factory_creator.h b/onnxruntime/core/providers/webgpu/webgpu_provider_factory_creator.h new file mode 100644 index 0000000000000..6257a85d45760 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/webgpu_provider_factory_creator.h @@ -0,0 +1,18 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include + +#include "core/framework/provider_options.h" +#include "core/providers/providers.h" + +namespace onnxruntime { +struct ConfigOptions; + +struct WebGpuProviderFactoryCreator { + static std::shared_ptr Create(const ConfigOptions& config_options); +}; + +} // namespace onnxruntime diff --git a/onnxruntime/core/session/inference_session.cc b/onnxruntime/core/session/inference_session.cc index 83e7596d2f6b8..76d34aabab6cb 100644 --- a/onnxruntime/core/session/inference_session.cc +++ b/onnxruntime/core/session/inference_session.cc @@ -759,12 +759,12 @@ common::Status InferenceSession::RegisterExecutionProvider(const std::shared_ptr // Some session option values (default or user provided) may not work with some EPs. // Rather than put the onus on the user to know these, make the appropriate change while logging the change. - if (provider_type == onnxruntime::kDmlExecutionProvider) { - // DML's memory is not byte addressable and hence mem pattern doesn't work. + if (provider_type == onnxruntime::kDmlExecutionProvider || provider_type == onnxruntime::kWebGpuExecutionProvider) { + // DML and WebGPU memory is not byte addressable and hence mem pattern doesn't work. if (session_options_.enable_mem_pattern) { LOGS(*session_logger_, INFO) - << "Having memory pattern enabled is not supported while using the DML Execution Provider. " - << "So disabling it for this session since it uses the DML Execution Provider."; + << "Having memory pattern enabled is not supported while using " << provider_type << ". " + << "So disabling it for this session since it uses " << provider_type << "."; session_options_.enable_mem_pattern = false; } diff --git a/onnxruntime/core/session/provider_registration.cc b/onnxruntime/core/session/provider_registration.cc index 68aeea9f3c4d2..8c512c561ea8c 100644 --- a/onnxruntime/core/session/provider_registration.cc +++ b/onnxruntime/core/session/provider_registration.cc @@ -131,6 +131,12 @@ ORT_API_STATUS_IMPL(OrtApis::SessionOptionsAppendExecutionProvider, options->provider_factories.push_back(WebNNProviderFactoryCreator::Create(provider_options)); #else status = create_not_supported_status(); +#endif + } else if (strcmp(provider_name, "WebGPU") == 0) { +#if defined(USE_WEBGPU) + options->provider_factories.push_back(WebGpuProviderFactoryCreator::Create(options->value.config_options)); +#else + status = create_not_supported_status(); #endif } else if (strcmp(provider_name, "AZURE") == 0) { #if defined(USE_AZURE) diff --git a/onnxruntime/python/onnxruntime_pybind_state.cc b/onnxruntime/python/onnxruntime_pybind_state.cc index cce88db1e61e3..3062738eefcf2 100644 --- a/onnxruntime/python/onnxruntime_pybind_state.cc +++ b/onnxruntime/python/onnxruntime_pybind_state.cc @@ -1239,6 +1239,10 @@ std::unique_ptr CreateExecutionProviderInstance( return onnxruntime::XnnpackProviderFactoryCreator::Create( cit == provider_options_map.end() ? ProviderOptions{} : cit->second, &session_options) ->CreateProvider(); +#endif + } else if (type == kWebGpuExecutionProvider) { +#if defined(USE_WEBGPU) + return onnxruntime::WebGpuProviderFactoryCreator::Create(session_options.config_options)->CreateProvider(); #endif } else if (type == kCannExecutionProvider) { #ifdef USE_CANN diff --git a/onnxruntime/test/onnx/main.cc b/onnxruntime/test/onnx/main.cc index e8c948ade1068..6d86e4c35af85 100644 --- a/onnxruntime/test/onnx/main.cc +++ b/onnxruntime/test/onnx/main.cc @@ -48,13 +48,16 @@ void usage() { "\t-v: verbose\n" "\t-n [test_case_name]: Specifies a single test case to run.\n" "\t-e [EXECUTION_PROVIDER]: EXECUTION_PROVIDER could be 'cpu', 'cuda', 'dnnl', 'tensorrt', 'vsinpu'" - "'openvino', 'rocm', 'migraphx', 'acl', 'armnn', 'xnnpack', 'nnapi', 'qnn', 'snpe' or 'coreml'. " + "'openvino', 'rocm', 'migraphx', 'acl', 'armnn', 'xnnpack', 'webgpu', 'nnapi', 'qnn', 'snpe' or 'coreml'. " "Default: 'cpu'.\n" "\t-p: Pause after launch, can attach debugger and continue\n" "\t-x: Use parallel executor, default (without -x): sequential executor.\n" "\t-d [device_id]: Specifies the device id for multi-device (e.g. GPU). The value should > 0\n" "\t-t: Specify custom relative tolerance values for output value comparison. default: 1e-5\n" "\t-a: Specify custom absolute tolerance values for output value comparison. default: 1e-5\n" + "\t-C: Specify session configuration entries as key-value pairs: -C \"| |\" \n" + "\t Refer to onnxruntime_session_options_config_keys.h for valid keys and values. \n" + "\t [Example] -C \"session.disable_cpu_ep_fallback|1 ep.context_enable|1\" \n" "\t-i: Specify EP specific runtime options as key value pairs. Different runtime options available are: \n" "\t [QNN only] [backend_path]: QNN backend path. e.g '/folderpath/libQnnHtp.so', '/folderpath/libQnnCpu.so'.\n" "\t [QNN only] [profiling_level]: QNN profiling level, options: 'basic', 'detailed', default 'off'.\n" @@ -124,6 +127,39 @@ static TestTolerances LoadTestTolerances(bool enable_cuda, bool enable_openvino, overrides_json["atol_default"], overrides_json["rtol_default"], absolute_overrides, relative_overrides); } +static bool ParseSessionConfigs(const std::string& configs_string, + std::unordered_map& session_configs) { + std::istringstream ss(configs_string); + std::string token; + + while (ss >> token) { + if (token == "") { + continue; + } + + std::string_view token_sv(token); + + auto pos = token_sv.find("|"); + if (pos == std::string_view::npos || pos == 0 || pos == token_sv.length()) { + // Error: must use a '|' to separate the key and value for session configuration entries. + return false; + } + + std::string key(token_sv.substr(0, pos)); + std::string value(token_sv.substr(pos + 1)); + + auto it = session_configs.find(key); + if (it != session_configs.end()) { + // Error: specified duplicate session configuration entry: {key} + return false; + } + + session_configs.insert(std::make_pair(std::move(key), std::move(value))); + } + + return true; +} + #ifdef _WIN32 int GetNumCpuCores() { SYSTEM_LOGICAL_PROCESSOR_INFORMATION buffer[256]; @@ -180,6 +216,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { bool enable_armnn = false; bool enable_rocm = false; bool enable_migraphx = false; + bool enable_webgpu = false; bool enable_xnnpack = false; bool override_tolerance = false; double atol = 1e-5; @@ -189,6 +226,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { bool user_graph_optimization_level_set = false; bool set_denormal_as_zero = false; std::basic_string ep_runtime_config_string; + std::unordered_map session_config_entries; std::string provider_name = "cpu"; OrtLoggingLevel logging_level = ORT_LOGGING_LEVEL_ERROR; @@ -199,7 +237,7 @@ int real_main(int argc, char* argv[], Ort::Env& env) { bool pause = false; { int ch; - while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:t:a:xvo:d:i:pzfb"))) != -1) { + while ((ch = getopt(argc, argv, ORT_TSTR("Ac:hj:Mn:r:e:t:a:xvo:d:C:i:pzfb"))) != -1) { switch (ch) { case 'A': enable_cpu_mem_arena = false; @@ -268,6 +306,8 @@ int real_main(int argc, char* argv[], Ort::Env& env) { enable_rocm = true; } else if (!CompareCString(optarg, ORT_TSTR("migraphx"))) { enable_migraphx = true; + } else if (!CompareCString(optarg, ORT_TSTR("webgpu"))) { + enable_webgpu = true; } else if (!CompareCString(optarg, ORT_TSTR("xnnpack"))) { enable_xnnpack = true; } else { @@ -324,6 +364,11 @@ int real_main(int argc, char* argv[], Ort::Env& env) { return -1; } break; + case 'C': + if (!ParseSessionConfigs(ToUTF8String(optarg), session_config_entries)) { + return -1; + } + break; case 'i': ep_runtime_config_string = optarg; break; @@ -410,6 +455,10 @@ int real_main(int argc, char* argv[], Ort::Env& env) { if (disable_ep_context_embed_mode) sf.AddConfigEntry(kOrtSessionOptionEpContextEmbedMode, "0"); + for (auto& it : session_config_entries) { + sf.AddConfigEntry(it.first.c_str(), it.second.c_str()); + } + if (enable_tensorrt) { #ifdef USE_TENSORRT Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_Tensorrt(sf, device_id)); @@ -699,6 +748,15 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); #endif } + if (enable_webgpu) { +#ifdef USE_WEBGPU + sf.AppendExecutionProvider("WebGPU", {}); +#else + fprintf(stderr, "WebGPU is not supported in this build"); + return -1; +#endif + } + if (user_graph_optimization_level_set) { sf.SetGraphOptimizationLevel(graph_optimization_level); } diff --git a/onnxruntime/test/perftest/command_args_parser.cc b/onnxruntime/test/perftest/command_args_parser.cc index 6e811f4596eab..42b73ec384cf5 100644 --- a/onnxruntime/test/perftest/command_args_parser.cc +++ b/onnxruntime/test/perftest/command_args_parser.cc @@ -38,8 +38,8 @@ namespace perftest { "\t-A: Disable memory arena\n" "\t-I: Generate tensor input binding (Free dimensions are treated as 1.)\n" "\t-c [parallel runs]: Specifies the (max) number of runs to invoke simultaneously. Default:1.\n" - "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " - "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack' or 'vitisai'. " + "\t-e [cpu|cuda|dnnl|tensorrt|openvino|dml|acl|nnapi|coreml|qnn|snpe|rocm|migraphx|xnnpack|vitisai|webgpu]: Specifies the provider 'cpu','cuda','dnnl','tensorrt', " + "'openvino', 'dml', 'acl', 'nnapi', 'coreml', 'qnn', 'snpe', 'rocm', 'migraphx', 'xnnpack', 'vitisai' or 'webgpu'. " "Default:'cpu'.\n" "\t-b [tf|ort]: backend to use. Default:ort\n" "\t-r [repeated_times]: Specifies the repeated times if running in 'times' test mode.Default:1000.\n" @@ -282,6 +282,8 @@ static bool ParseSessionConfigs(const std::string& configs_string, test_config.machine_config.provider_type_name = onnxruntime::kXnnpackExecutionProvider; } else if (!CompareCString(optarg, ORT_TSTR("vitisai"))) { test_config.machine_config.provider_type_name = onnxruntime::kVitisAIExecutionProvider; + } else if (!CompareCString(optarg, ORT_TSTR("webgpu"))) { + test_config.machine_config.provider_type_name = onnxruntime::kWebGpuExecutionProvider; } else { return false; } diff --git a/onnxruntime/test/perftest/ort_test_session.cc b/onnxruntime/test/perftest/ort_test_session.cc index 3ed5eaee5a5f7..eb230ac771e13 100644 --- a/onnxruntime/test/perftest/ort_test_session.cc +++ b/onnxruntime/test/perftest/ort_test_session.cc @@ -593,6 +593,13 @@ select from 'TF8', 'TF16', 'UINT8', 'FLOAT', 'ITENSOR'. \n)"); "XNNPACK", {{"intra_op_num_threads", std::to_string(performance_test_config.run_config.intra_op_num_threads)}}); #else ORT_THROW("Xnnpack is not supported in this build\n"); +#endif + } else if (provider_name_ == onnxruntime::kWebGpuExecutionProvider) { +#ifdef USE_WEBGPU + session_options.AppendExecutionProvider( + "WebGPU", {{"intra_op_num_threads", std::to_string(performance_test_config.run_config.intra_op_num_threads)}}); +#else + ORT_THROW("WebGPU is not supported in this build\n"); #endif } else if (provider_name_ == onnxruntime::kVitisAIExecutionProvider) { #ifdef USE_VITISAI diff --git a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm index d145a00b1348f..32b4b32e299d6 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/ios_package_testUITests/ios_package_uitest_cpp_api.mm @@ -13,15 +13,19 @@ #if __has_include() #define COREML_EP_AVAILABLE 1 +#include #else #define COREML_EP_AVAILABLE 0 #endif -#if COREML_EP_AVAILABLE -#include +#if __has_include() +#define WEBGPU_EP_AVAILABLE 1 +// WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider +#else +#define WEBGPU_EP_AVAILABLE 0 #endif -void testSigmoid(const char* modelPath, bool useCoreML) { +void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { // This is an e2e test for ORT C++ API Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "testCppAPI"); @@ -38,6 +42,12 @@ void testSigmoid(const char* modelPath, bool useCoreML) { (void)useCoreML; #endif + if (useWebGPU) { + std::unordered_map provider_options; + // set provider options if needed. e.g. deviceId + session_options.AppendExecutionProvider("WebGPU", provider_options); + } + Ort::Session session(env, modelPath, session_options); size_t input_tensor_size = 3 * 4 * 5; @@ -96,7 +106,7 @@ - (NSString*)getFilePath { } - (void)testCppAPI_Basic { - testSigmoid([self getFilePath].UTF8String, false /* useCoreML */); + testSigmoid([self getFilePath].UTF8String); } #if COREML_EP_AVAILABLE @@ -105,4 +115,9 @@ - (void)testCppAPI_Basic_CoreML { } #endif +#if WEBGPU_EP_AVAILABLE +- (void)testCppAPI_Basic_WebGPU { + testSigmoid([self getFilePath].UTF8String, false /* useCoreML */, true /* useWebGPU */); +} +#endif @end diff --git a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm index 613c6e545939f..86001b6cb50a5 100644 --- a/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm +++ b/onnxruntime/test/platform/apple/apple_package_test/macos_package_testUITests/macos_package_uitest_cpp_api.mm @@ -13,15 +13,19 @@ #if __has_include() #define COREML_EP_AVAILABLE 1 +#include #else #define COREML_EP_AVAILABLE 0 #endif -#if COREML_EP_AVAILABLE -#include +#if __has_include() +#define WEBGPU_EP_AVAILABLE 1 +// WebGPU EP doesn't require including the header as it's enabled via AppendExecutionProvider +#else +#define WEBGPU_EP_AVAILABLE 0 #endif -void testSigmoid(const char* modelPath, bool useCoreML) { +void testSigmoid(const char* modelPath, bool useCoreML = false, bool useWebGPU = false) { // This is an e2e test for ORT C++ API Ort::Env env(ORT_LOGGING_LEVEL_WARNING, "testCppAPI"); @@ -38,6 +42,12 @@ void testSigmoid(const char* modelPath, bool useCoreML) { (void)useCoreML; #endif + if (useWebGPU) { + std::unordered_map provider_options; + // set provider options if needed. e.g. deviceId + session_options.AppendExecutionProvider("WebGPU", provider_options); + } + Ort::Session session(env, modelPath, session_options); size_t input_tensor_size = 3 * 4 * 5; @@ -96,7 +106,7 @@ - (NSString*)getFilePath { } - (void)testCppAPI_Basic { - testSigmoid([self getFilePath].UTF8String, false /* useCoreML */); + testSigmoid([self getFilePath].UTF8String); } #if COREML_EP_AVAILABLE @@ -105,4 +115,10 @@ - (void)testCppAPI_Basic_CoreML { } #endif +#if WEBGPU_EP_AVAILABLE +- (void)testCppAPI_Basic_WebGPU { + testSigmoid([self getFilePath].UTF8String, false /* useCoreML */, true /* useWebGPU */); +} +#endif + @end diff --git a/onnxruntime/test/providers/base_tester.cc b/onnxruntime/test/providers/base_tester.cc index 9625b67be79c0..9a17e1b610017 100644 --- a/onnxruntime/test/providers/base_tester.cc +++ b/onnxruntime/test/providers/base_tester.cc @@ -657,6 +657,7 @@ void BaseTester::RunWithConfig(size_t* number_of_pre_packed_weights_counter, kQnnExecutionProvider, kSnpeExecutionProvider, kXnnpackExecutionProvider, + kWebGpuExecutionProvider, }; // need to special case any synthetic EP names in the exclude list @@ -712,6 +713,8 @@ void BaseTester::RunWithConfig(size_t* number_of_pre_packed_weights_counter, execution_provider = DefaultXnnpackExecutionProvider(); else if (provider_type == onnxruntime::kDmlExecutionProvider) execution_provider = DefaultDmlExecutionProvider(); + else if (provider_type == onnxruntime::kWebGpuExecutionProvider) + execution_provider = DefaultWebGpuExecutionProvider(); else if (provider_type == onnxruntime::kNvDmlExecutionProvider) execution_provider = DefaultNvDmlExecutionProvider(); diff --git a/onnxruntime/test/providers/compare_provider_test_utils.cc b/onnxruntime/test/providers/compare_provider_test_utils.cc index 3ef74259e27b6..386a5656d8a01 100644 --- a/onnxruntime/test/providers/compare_provider_test_utils.cc +++ b/onnxruntime/test/providers/compare_provider_test_utils.cc @@ -36,6 +36,8 @@ std::unique_ptr GetExecutionProvider(const std::string& prov execution_provider = DefaultRocmExecutionProvider(); else if (provider_type == onnxruntime::kDmlExecutionProvider) execution_provider = DefaultDmlExecutionProvider(); + else if (provider_type == onnxruntime::kWebGpuExecutionProvider) + execution_provider = DefaultWebGpuExecutionProvider(); // skip if execution provider is disabled if (execution_provider == nullptr) { return nullptr; diff --git a/onnxruntime/test/util/default_providers.cc b/onnxruntime/test/util/default_providers.cc index edc6bda12b5f7..e4cff3acfa08d 100644 --- a/onnxruntime/test/util/default_providers.cc +++ b/onnxruntime/test/util/default_providers.cc @@ -302,6 +302,15 @@ std::unique_ptr DefaultXnnpackExecutionProvider() { #endif } +std::unique_ptr DefaultWebGpuExecutionProvider() { +#ifdef USE_WEBGPU + ConfigOptions config_options{}; + return WebGpuProviderFactoryCreator::Create(config_options)->CreateProvider(); +#else + return nullptr; +#endif +} + std::unique_ptr DefaultCannExecutionProvider() { #ifdef USE_CANN OrtCANNProviderOptions provider_options{}; diff --git a/onnxruntime/test/util/include/default_providers.h b/onnxruntime/test/util/include/default_providers.h index 8ba573cfe0f54..6a6a9c89bd5ae 100644 --- a/onnxruntime/test/util/include/default_providers.h +++ b/onnxruntime/test/util/include/default_providers.h @@ -63,6 +63,7 @@ std::unique_ptr DefaultQnnExecutionProvider(); std::unique_ptr QnnExecutionProviderWithOptions(const ProviderOptions& options, const SessionOptions* session_options = nullptr); std::unique_ptr DefaultXnnpackExecutionProvider(); +std::unique_ptr DefaultWebGpuExecutionProvider(); std::unique_ptr DefaultCannExecutionProvider(); std::unique_ptr DefaultDmlExecutionProvider(); std::unique_ptr DefaultNvDmlExecutionProvider(); diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index 7cc0655532c4d..b734902a6ec05 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -571,6 +571,7 @@ def convert_arg_line_to_args(self, arg_line): "--nnapi_min_api", type=int, help="Minimum Android API level to enable NNAPI, should be no less than 27" ) parser.add_argument("--use_jsep", action="store_true", help="Build with JavaScript kernels.") + parser.add_argument("--use_webgpu", action="store_true", help="Build with WebGPU support.") parser.add_argument("--use_qnn", action="store_true", help="Build with QNN support.") parser.add_argument("--qnn_home", help="Path to QNN SDK dir.") parser.add_argument("--use_rknpu", action="store_true", help="Build with RKNPU.") @@ -1058,6 +1059,7 @@ def generate_build_tree( "-Donnxruntime_ARMNN_RELU_USE_CPU=" + ("OFF" if args.armnn_relu else "ON"), "-Donnxruntime_ARMNN_BN_USE_CPU=" + ("OFF" if args.armnn_bn else "ON"), "-Donnxruntime_USE_JSEP=" + ("ON" if args.use_jsep else "OFF"), + "-Donnxruntime_USE_WEBGPU=" + ("ON" if args.use_webgpu else "OFF"), # Training related flags "-Donnxruntime_ENABLE_NVTX_PROFILE=" + ("ON" if args.enable_nvtx_profile else "OFF"), "-Donnxruntime_ENABLE_TRAINING=" + ("ON" if args.enable_training else "OFF"), @@ -1317,6 +1319,9 @@ def generate_build_tree( raise BuildError("WebNN is only available for WebAssembly build.") cmake_args += ["-Donnxruntime_USE_WEBNN=ON"] + if args.use_jsep and args.use_webgpu: + raise BuildError("JSEP (--use_jsep) and WebGPU (--use_webgpu) cannot be enabled at the same time.") + if args.use_snpe: cmake_args += ["-Donnxruntime_USE_SNPE=ON"] diff --git a/tools/ci_build/gen_def.py b/tools/ci_build/gen_def.py index c4add6f0e8910..2b7790ec4e683 100755 --- a/tools/ci_build/gen_def.py +++ b/tools/ci_build/gen_def.py @@ -80,6 +80,7 @@ def parse_arguments(): "dnnl", "tensorrt", "azure", + "webgpu", ): file.write(f"#include \n") file.write("void* GetFunctionEntryByName(const char* name){\n") diff --git a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py index 71aeb9e7b0304..dd037c17ae3b3 100755 --- a/tools/ci_build/github/apple/build_and_assemble_apple_pods.py +++ b/tools/ci_build/github/apple/build_and_assemble_apple_pods.py @@ -133,6 +133,8 @@ def main(): str(build_dir / "framework_out"), "--variant", package_variant.name, + "--test_project_stage_dir", # use a specific directory so it's easier to debug + str(build_dir / "test_apple_packages_staging"), ] run(test_apple_packages_args) diff --git a/tools/ci_build/github/azure-pipelines/win-gpu-webgpu-ci-pipeline.yml b/tools/ci_build/github/azure-pipelines/win-gpu-webgpu-ci-pipeline.yml new file mode 100644 index 0000000000000..c4db7735aaf2f --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/win-gpu-webgpu-ci-pipeline.yml @@ -0,0 +1,58 @@ +##### start trigger Don't edit it manually, Please do edit set-trigger-rules.py #### +### please do rerun set-trigger-rules.py ### +trigger: + branches: + include: + - main + - rel-* + paths: + exclude: + - docs/** + - README.md + - CONTRIBUTING.md + - BUILD.md + - 'js/web' + - 'onnxruntime/core/providers/js' +pr: + branches: + include: + - main + - rel-* + paths: + exclude: + - docs/** + - README.md + - CONTRIBUTING.md + - BUILD.md + - 'js/web' + - 'onnxruntime/core/providers/js' +#### end trigger #### + +parameters: +- name: RunOnnxRuntimeTests + displayName: Run Tests? + type: boolean + default: true + +stages: +- stage: webgpu + dependsOn: [] + jobs: + - template: templates/jobs/win-ci-vs-2022-job.yml + parameters: + BuildConfig: 'RelWithDebInfo' + EnvSetupScript: setup_env_cuda.bat + buildArch: x64 + # add --enable_pybind and --build_java if necessary + additionalBuildFlags: >- + --build_nodejs + --use_webgpu + --cmake_extra_defines onnxruntime_BUILD_UNIT_TESTS=ON + msbuildPlatform: x64 + isX86: false + job_name_suffix: x64_RelWithDebInfo + RunOnnxRuntimeTests: ${{ parameters.RunOnnxRuntimeTests }} + ORT_EP_NAME: WebGPU + EnablePython: false + WITH_CACHE: true + MachinePool: onnxruntime-Win2022-VS2022-webgpu-A10 diff --git a/tools/ci_build/set-trigger-rules.py b/tools/ci_build/set-trigger-rules.py index fb6aa44cdf31a..0e9cd514d8aa5 100644 --- a/tools/ci_build/set-trigger-rules.py +++ b/tools/ci_build/set-trigger-rules.py @@ -40,6 +40,7 @@ "win-gpu-training-ci-pipeline.yml", "win-gpu-doc-gen-ci-pipeline.yml", "win-gpu-tensorrt-ci-pipeline.yml", + "win-gpu-webgpu-ci-pipeline.yml", "win-qnn-arm64-ci-pipeline.yml", "win-qnn-ci-pipeline.yml", ]