Skip to content

Commit

Permalink
Initial WebGPU EP checkin (#22318)
Browse files Browse the repository at this point in the history
### Description

This change introduces the WebGPU EP into ONNX Runtime.

To make the PR as simple as possible, this PR excluded the following:
- C API changes for WebGPU EP
- actual implementation of WebGPU EP. Currently in this PR, WebGPU is a
stub implementation that does not register any kernel.
- Python IO Binding update
- Node.js IO Binding update

This PR now contains only 43 file changes (while the working branch
contains 130+) and hopefully this makes it easier to review.

There is going to be separated PRs for each mentioned above.

Current working branch: #21904
  • Loading branch information
fs-eire authored Oct 8, 2024
1 parent bc84958 commit c5d28ca
Show file tree
Hide file tree
Showing 41 changed files with 749 additions and 57 deletions.
22 changes: 22 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -146,6 +146,7 @@ option(onnxruntime_TVM_USE_LLVM "Build TVM with LLVM. Set customized path to llv
option(onnxruntime_TVM_USE_HASH "Build ipp-crypto library for support hash algorithm. It is defined for TVM only")
option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternative math library on ARM, WebAssembly and x86." OFF)
option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)

# Options related to reducing the binary size produced by the build
# XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
Expand Down Expand Up @@ -488,6 +489,22 @@ if (onnxruntime_BUILD_CSHARP)
endif()
endif()

if (onnxruntime_BUILD_OBJC)
check_language(OBJC)
if(CMAKE_OBJC_COMPILER)
enable_language(OBJC)
else()
message(FATAL_ERROR "Objective-C is not supported.")
endif()

check_language(OBJCXX)
if(CMAKE_OBJCXX_COMPILER)
enable_language(OBJCXX)
else()
message(FATAL_ERROR "Objective-C++ is not supported.")
endif()
endif()

if (NOT WIN32)
#TODO: On Linux we may try https://github.com/microsoft/TraceLogging.git
if (onnxruntime_ENABLE_INSTRUMENT)
Expand Down Expand Up @@ -910,6 +927,11 @@ if (onnxruntime_USE_WEBNN)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBNN=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webnn)
endif()
if (onnxruntime_USE_WEBGPU)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_WEBGPU=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_WEBGPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES webgpu)
endif()
if (onnxruntime_USE_CANN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_CANN=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_CANN=1)
Expand Down
82 changes: 66 additions & 16 deletions cmake/external/onnxruntime_external_deps.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -576,10 +576,11 @@ if (onnxruntime_USE_MIMALLOC)
onnxruntime_fetchcontent_makeavailable(mimalloc)
endif()

#onnxruntime_EXTERNAL_LIBRARIES could contain onnx, onnx_proto,libprotobuf, cuda/cudnn,
# dnnl/mklml, onnxruntime_codegen_tvm, tvm and pthread
# pthread is always at the last
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date ${ONNXRUNTIME_CLOG_TARGET_NAME})
set(onnxruntime_EXTERNAL_LIBRARIES ${onnxruntime_EXTERNAL_LIBRARIES_XNNPACK} ${WIL_TARGET} nlohmann_json::nlohmann_json
onnx onnx_proto ${PROTOBUF_LIB} re2::re2 Boost::mp11 safeint_interface
flatbuffers::flatbuffers ${GSL_TARGET} ${ABSEIL_LIBS} date::date
${ONNXRUNTIME_CLOG_TARGET_NAME})

# The source code of onnx_proto is generated, we must build this lib first before starting to compile the other source code that uses ONNX protobuf types.
# The other libs do not have the problem. All the sources are already there. We can compile them in any order.
set(onnxruntime_EXTERNAL_DEPENDENCIES onnx_proto flatbuffers::flatbuffers)
Expand Down Expand Up @@ -634,24 +635,73 @@ if (onnxruntime_USE_COREML)
FetchContent_Populate(coremltools)
endif()

message(STATUS "Finished fetching external dependencies")
if (onnxruntime_USE_WEBGPU)
FetchContent_Declare(
dawn
URL ${DEP_URL_dawn}
URL_HASH SHA1=${DEP_SHA1_dawn}
PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
)

set(onnxruntime_LINK_DIRS )
# use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
set(DAWN_BUILD_SAMPLES OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_INSTALL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_NULL OFF CACHE BOOL "" FORCE)
set(DAWN_FETCH_DEPENDENCIES ON CACHE BOOL "" FORCE)

# disable things we don't use
set(DAWN_DXC_ENABLE_ASSERTS_IN_NDEBUG OFF)
set(DAWN_ENABLE_DESKTOP_GL OFF CACHE BOOL "" FORCE)
set(DAWN_ENABLE_OPENGLES OFF CACHE BOOL "" FORCE)
set(DAWN_SUPPORTS_GLFW_FOR_WINDOWING OFF CACHE BOOL "" FORCE)
set(DAWN_USE_GLFW OFF CACHE BOOL "" FORCE)
set(DAWN_USE_WINDOWS_UI OFF CACHE BOOL "" FORCE)
set(DAWN_USE_X11 OFF CACHE BOOL "" FORCE)

set(TINT_BUILD_TESTS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_CMD_TOOLS OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_WRITER OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_GLSL_VALIDATOR OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_IR_BINARY OFF CACHE BOOL "" FORCE)
set(TINT_BUILD_SPV_READER OFF CACHE BOOL "" FORCE) # don't need. disabling is a large binary size saving
set(TINT_BUILD_WGSL_WRITER ON CACHE BOOL "" FORCE) # needed to create cache key. runtime error if not enabled.

# SPIR-V validation shouldn't be required given we're using Tint to create the SPIR-V.
set(DAWN_ENABLE_SPIRV_VALIDATION OFF CACHE BOOL "" FORCE)

if (WIN32)
# building this requires the HLSL writer to be enabled in Tint. TBD if that we need either of these to be ON.
set(DAWN_USE_BUILT_DXC ON CACHE BOOL "" FORCE)
set(TINT_BUILD_HLSL_WRITER ON CACHE BOOL "" FORCE)

# Vulkan may optionally be included in a Windows build. Exclude until we have an explicit use case that requires it.
set(DAWN_ENABLE_VULKAN OFF CACHE BOOL "" FORCE)
endif()

onnxruntime_fetchcontent_makeavailable(dawn)

list(APPEND onnxruntime_EXTERNAL_LIBRARIES dawn::dawn_native dawn::dawn_proc)
endif()

set(onnxruntime_LINK_DIRS)
if (onnxruntime_USE_CUDA)
find_package(CUDAToolkit REQUIRED)
find_package(CUDAToolkit REQUIRED)

if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()
include(cuDNN)
if(onnxruntime_CUDNN_HOME)
file(TO_CMAKE_PATH ${onnxruntime_CUDNN_HOME} onnxruntime_CUDNN_HOME)
set(CUDNN_PATH ${onnxruntime_CUDNN_HOME})
endif()

include(cuDNN)
endif()

if(onnxruntime_USE_SNPE)
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
include(external/find_snpe.cmake)
list(APPEND onnxruntime_EXTERNAL_LIBRARIES ${SNPE_NN_LIBS})
endif()

FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)
FILE(TO_NATIVE_PATH ${CMAKE_BINARY_DIR} ORT_BINARY_DIR)
FILE(TO_NATIVE_PATH ${PROJECT_SOURCE_DIR} ORT_SOURCE_DIR)

message(STATUS "Finished fetching external dependencies")
69 changes: 62 additions & 7 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -38,7 +38,7 @@ function(get_c_cxx_api_headers HEADERS_VAR)

# need to add header files for enabled EPs
foreach(f ${ONNXRUNTIME_PROVIDER_NAMES})
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
# The header files in include/onnxruntime/core/providers/cuda directory cannot be flattened to the same directory
# with onnxruntime_c_api.h . Most other EPs probably also do not work in this way.
if((NOT f STREQUAL cuda) AND (NOT f STREQUAL rocm))
file(GLOB _provider_headers CONFIGURE_DEPENDS
Expand Down Expand Up @@ -89,10 +89,22 @@ elseif(onnxruntime_BUILD_APPLE_FRAMEWORK)
# create Info.plist for the framework and podspec for CocoaPods (optional)
set(MACOSX_FRAMEWORK_NAME "onnxruntime")
set(MACOSX_FRAMEWORK_IDENTIFIER "com.microsoft.onnxruntime")
# Need to include CoreML as a weaklink for CocoaPods package if the EP is enabled

# Setup weak frameworks for macOS/iOS. 'weak' as the CoreML or WebGPU EPs are optionally enabled.
if(onnxruntime_USE_COREML)
set(APPLE_WEAK_FRAMEWORK "\\\"CoreML\\\"")
list(APPEND _weak_frameworks "\\\"CoreML\\\"")
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND _weak_frameworks "\\\"QuartzCore\\\"")
list(APPEND _weak_frameworks "\\\"IOSurface\\\"")
list(APPEND _weak_frameworks "\\\"Metal\\\"")
endif()

if (_weak_frameworks)
string(JOIN ", " APPLE_WEAK_FRAMEWORK ${_weak_frameworks})
endif()

set(INFO_PLIST_PATH "${CMAKE_CURRENT_BINARY_DIR}/Info.plist")
configure_file(${REPO_ROOT}/cmake/Info.plist.in ${INFO_PLIST_PATH})
configure_file(
Expand Down Expand Up @@ -200,6 +212,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_RKNPU}
${PROVIDERS_VSINPU}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}
${PROVIDERS_INTERNAL_TESTING}
Expand Down Expand Up @@ -364,16 +377,58 @@ if(onnxruntime_BUILD_APPLE_FRAMEWORK)
endif()
endforeach()

# helper function that recurses to also handle static library dependencies of the ORT external libraries
set(_processed_libs) # keep track of processed libraries to skip any duplicate dependencies
function(add_symlink_for_static_lib_and_dependencies lib)
function(process cur_target)
# de-alias if applicable so a consistent target name is used
get_target_property(alias ${cur_target} ALIASED_TARGET)
if(TARGET ${alias})
set(cur_target ${alias})
endif()

if(${cur_target} IN_LIST _processed_libs OR ${cur_target} IN_LIST lib_and_dependencies)
return()
endif()

list(APPEND lib_and_dependencies ${cur_target})

get_target_property(link_libraries ${cur_target} LINK_LIBRARIES)
foreach(dependency ${link_libraries})
if(TARGET ${dependency})
process(${dependency})
endif()
endforeach()

set(lib_and_dependencies ${lib_and_dependencies} PARENT_SCOPE)
endfunction()

set(lib_and_dependencies)
process(${lib})

foreach(_target ${lib_and_dependencies})
get_target_property(type ${_target} TYPE)
if(${type} STREQUAL "STATIC_LIBRARY")
# message(STATUS "Adding symlink for ${_target}")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_target}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_target}>)
endif()
endforeach()

list(APPEND _processed_libs ${lib_and_dependencies})
set(_processed_libs ${_processed_libs} PARENT_SCOPE)
endfunction()

# for external libraries we create a symlink to the .a file
foreach(_LIB ${onnxruntime_EXTERNAL_LIBRARIES})
if(NOT TARGET ${_LIB}) # if we didn't build from source. it may not a target
if(NOT TARGET ${_LIB}) # if we didn't build from source it may not be a target
continue()
endif()

GET_TARGET_PROPERTY(_LIB_TYPE ${_LIB} TYPE)
if(_LIB_TYPE STREQUAL "STATIC_LIBRARY")
add_custom_command(TARGET onnxruntime POST_BUILD
COMMAND ${CMAKE_COMMAND} -E create_symlink
$<TARGET_FILE:${_LIB}> ${STATIC_LIB_DIR}/$<TARGET_LINKER_FILE_NAME:${_LIB}>)
add_symlink_for_static_lib_and_dependencies(${_LIB})
endif()
endforeach()

Expand Down
5 changes: 4 additions & 1 deletion cmake/onnxruntime_nodejs.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -67,6 +67,9 @@ endif()
if (onnxruntime_USE_DML)
set(NODEJS_BINDING_USE_DML "--use_dml")
endif()
if (onnxruntime_USE_WEBGPU)
set(NODEJS_BINDING_USE_WEBGPU "--use_webgpu")
endif()
if (onnxruntime_USE_TENSORRT)
set(NODEJS_BINDING_USE_TENSORRT "--use_tensorrt")
endif()
Expand All @@ -92,7 +95,7 @@ add_custom_target(js_common_npm_ci ALL
add_custom_target(nodejs_binding_wrapper ALL
COMMAND ${NPM_CLI} ci
COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
--arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_WEBGPU} ${NODEJS_BINDING_USE_TENSORRT}
${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
WORKING_DIRECTORY ${JS_NODE_ROOT}
COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
Expand Down
14 changes: 0 additions & 14 deletions cmake/onnxruntime_objectivec.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -9,20 +9,6 @@ if(NOT onnxruntime_BUILD_SHARED_LIB)
message(FATAL_ERROR "The Objective-C API requires onnxruntime_BUILD_SHARED_LIB to be enabled.")
endif()

check_language(OBJC)
if(CMAKE_OBJC_COMPILER)
enable_language(OBJC)
else()
message(FATAL_ERROR "Objective-C is not supported.")
endif()

check_language(OBJCXX)
if(CMAKE_OBJCXX_COMPILER)
enable_language(OBJCXX)
else()
message(FATAL_ERROR "Objective-C++ is not supported.")
endif()

add_compile_options(
"$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wall>"
"$<$<COMPILE_LANGUAGE:OBJC,OBJCXX>:-Wextra>")
Expand Down
7 changes: 7 additions & 0 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,9 @@ endif()
if(onnxruntime_USE_WEBNN)
set(PROVIDERS_WEBNN onnxruntime_providers_webnn)
endif()
if(onnxruntime_USE_WEBGPU)
set(PROVIDERS_WEBGPU onnxruntime_providers_webgpu)
endif()
if (onnxruntime_USE_CANN)
set(PROVIDERS_CANN onnxruntime_providers_cann)
endif()
Expand Down Expand Up @@ -151,6 +154,10 @@ if (onnxruntime_USE_WEBNN)
include(onnxruntime_providers_webnn.cmake)
endif()

if (onnxruntime_USE_WEBGPU)
include(onnxruntime_providers_webgpu.cmake)
endif()

if (onnxruntime_USE_NNAPI_BUILTIN)
include(onnxruntime_providers_nnapi.cmake)
endif()
Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_providers_cpu.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,11 @@ file(GLOB_RECURSE onnxruntime_js_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/js/*.cc"
)

file(GLOB_RECURSE onnxruntime_webgpu_contrib_ops_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/contrib_ops/webgpu/*.cc"
)

file(GLOB onnxruntime_providers_common_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/*.cc"
Expand Down
27 changes: 27 additions & 0 deletions cmake/onnxruntime_providers_webgpu.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Copyright (c) Microsoft Corporation. All rights reserved.
# Licensed under the MIT License.

if (onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD)
message(FATAL_ERROR "WebGPU EP can not be used in a basic minimal build. Please build with '--minimal_build extended'")
endif()

add_compile_definitions(USE_WEBGPU=1)
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
add_definitions(-DENABLE_WEBASSEMBLY_THREADS=1)
endif()
file(GLOB_RECURSE onnxruntime_providers_webgpu_cc_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/webgpu/*.cc"
)
if(NOT onnxruntime_DISABLE_CONTRIB_OPS)
source_group(TREE ${ONNXRUNTIME_ROOT} FILES ${onnxruntime_webgpu_contrib_ops_cc_srcs})
list(APPEND onnxruntime_providers_webgpu_cc_srcs ${onnxruntime_webgpu_contrib_ops_cc_srcs})
endif()

source_group(TREE ${REPO_ROOT} FILES ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_static_library(onnxruntime_providers_webgpu ${onnxruntime_providers_webgpu_cc_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_webgpu
onnxruntime_common dawn::dawncpp_headers dawn::dawn_headers onnx onnx_proto flatbuffers::flatbuffers Boost::mp11 safeint_interface)
target_link_libraries(onnxruntime_providers_webgpu dawn::dawn_native dawn::dawn_proc)

set_target_properties(onnxruntime_providers_webgpu PROPERTIES FOLDER "ONNXRuntime")
1 change: 1 addition & 0 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -178,6 +178,7 @@ target_link_libraries(onnxruntime_pybind11_state PRIVATE
${PROVIDERS_ACL}
${PROVIDERS_ARMNN}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBGPU}
${PROVIDERS_AZURE}
${PROVIDERS_QNN}
onnxruntime_optimizer
Expand Down
12 changes: 12 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -562,6 +562,10 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
endif()

if(onnxruntime_USE_RKNPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_rknpu)
endif()
Expand Down Expand Up @@ -603,6 +607,7 @@ set(ONNXRUNTIME_TEST_LIBS
${PROVIDERS_NNAPI}
${PROVIDERS_VSINPU}
${PROVIDERS_JS}
${PROVIDERS_WEBGPU}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}
${PROVIDERS_RKNPU}
Expand Down Expand Up @@ -664,6 +669,13 @@ if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_js)
endif()

if(onnxruntime_USE_WEBGPU)
list(APPEND onnxruntime_test_framework_src_patterns ${TEST_SRC_DIR}/providers/webgpu/*)
list(APPEND onnxruntime_test_framework_libs onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_webgpu)
list(APPEND onnxruntime_test_providers_libs onnxruntime_providers_webgpu)
endif()

# QNN EP tests require CPU EP op implementations for accuracy evaluation, so disable on minimal
# or reduced op builds.
if(onnxruntime_USE_QNN AND NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_REDUCED_OPS_BUILD)
Expand Down
Loading

0 comments on commit c5d28ca

Please sign in to comment.