From 768c79317c41b066e5bf78ff617c91477ba17fd1 Mon Sep 17 00:00:00 2001 From: Jon Campbell Date: Thu, 9 May 2024 13:11:07 -0700 Subject: [PATCH] Enable QNN HTP support for Node (#20576) ### Description Add support for using Onnx Runtime with Node ### Motivation and Context Onnx Runtime supports the QNN HTP, but does not support it for Node.js. This adds baseline support for the Onnx Runtime to be used with Node. Note it does not update the node packages that are distributed officially. This simply patches the onnxruntime.dll to allow 'qnn' to be used as an execution provider. Testing was done using the existing onnxruntime-node package. The `onnxruntime.dll` and `onnxruntime_binding.node` were swapped into `node_modules\onnxruntime-node\bin\napi-v3\win32\arm64` with the newly built version, then the various QNN dlls and .so files were placed next to the onnxruntime.dll. Testing was performed on a variety of models and applications, but the easiest test is to modify the [node quickstart example](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/js/quick-start_onnxruntime-node). --- cmake/onnxruntime_common.cmake | 6 ++++++ cmake/onnxruntime_nodejs.cmake | 5 ++++- js/common/lib/inference-session.ts | 5 +++++ js/node/CMakeLists.txt | 4 ++++ js/node/script/build.ts | 5 +++++ js/node/src/inference_session_wrap.cc | 3 +++ js/node/src/session_options_helper.cc | 7 +++++++ 7 files changed, 34 insertions(+), 1 deletion(-) diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake index 69d8f5fa138c7..896379d743441 100644 --- a/cmake/onnxruntime_common.cmake +++ b/cmake/onnxruntime_common.cmake @@ -71,6 +71,12 @@ if(onnxruntime_target_platform STREQUAL "ARM64EC") endif() endif() +if(onnxruntime_target_platform STREQUAL "ARM64") + if (MSVC) + add_compile_options("/bigobj") + endif() +endif() + file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS ${onnxruntime_common_src_patterns} ) diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake index 555baac6f1a51..f11928c11cf14 100644 --- a/cmake/onnxruntime_nodejs.cmake +++ b/cmake/onnxruntime_nodejs.cmake @@ -73,6 +73,9 @@ endif() if (onnxruntime_USE_COREML) set(NODEJS_BINDING_USE_COREML "--use_coreml") endif() +if (onnxruntime_USE_QNN) + set(NODEJS_BINDING_USE_QNN "--use_qnn") +endif() if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS) # add custom target @@ -90,7 +93,7 @@ add_custom_target(nodejs_binding_wrapper ALL COMMAND ${NPM_CLI} ci COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR} --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT} - ${NODEJS_BINDING_USE_COREML} + ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN} WORKING_DIRECTORY ${JS_NODE_ROOT} COMMENT "Using cmake-js to build OnnxRuntime Node.js binding") diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts index 14db5c59d972a..353d93bbc34ae 100644 --- a/js/common/lib/inference-session.ts +++ b/js/common/lib/inference-session.ts @@ -201,6 +201,7 @@ export declare namespace InferenceSession { webgl: WebGLExecutionProviderOption; webgpu: WebGpuExecutionProviderOption; webnn: WebNNExecutionProviderOption; + qnn: QnnExecutionProviderOption; xnnpack: XnnpackExecutionProviderOption; } @@ -247,6 +248,10 @@ export declare namespace InferenceSession { numThreads?: number; powerPreference?: 'default'|'low-power'|'high-performance'; } + export interface QnnExecutionProviderOption extends ExecutionProviderOption { + readonly name: 'qnn'; + // TODO add flags + } export interface CoreMLExecutionProviderOption extends ExecutionProviderOption { readonly name: 'coreml'; /** diff --git a/js/node/CMakeLists.txt b/js/node/CMakeLists.txt index 8157df288eeb9..5c32f62f3a802 100644 --- a/js/node/CMakeLists.txt +++ b/js/node/CMakeLists.txt @@ -37,6 +37,7 @@ option(USE_DML "Build with DirectML support" OFF) option(USE_CUDA "Build with CUDA support" OFF) option(USE_TENSORRT "Build with TensorRT support" OFF) option(USE_COREML "Build with CoreML support" OFF) +option(USE_QNN "Build with QNN support" OFF) if(USE_DML) add_compile_definitions(USE_DML=1) @@ -50,6 +51,9 @@ endif() if(USE_COREML) add_compile_definitions(USE_COREML=1) endif() +if(USE_QNN) + add_compile_definitions(USE_QNN=1) +endif() # source files file(GLOB ORT_NODEJS_BINDING_SOURCE_FILES ${CMAKE_SOURCE_DIR}/src/*.cc) diff --git a/js/node/script/build.ts b/js/node/script/build.ts index cc59507179085..3f0f804ed368e 100644 --- a/js/node/script/build.ts +++ b/js/node/script/build.ts @@ -35,6 +35,8 @@ const USE_CUDA = !!buildArgs.use_cuda; const USE_TENSORRT = !!buildArgs.use_tensorrt; // --use_coreml const USE_COREML = !!buildArgs.use_coreml; +// --use_qnn +const USE_QNN = !!buildArgs.use_qnn; // build path const ROOT_FOLDER = path.join(__dirname, '..'); @@ -72,6 +74,9 @@ if (USE_TENSORRT) { if (USE_COREML) { args.push('--CDUSE_COREML=ON'); } +if (USE_QNN) { + args.push('--CDUSE_QNN=ON'); +} // set CMAKE_OSX_ARCHITECTURES for macOS build if (os.platform() === 'darwin') { diff --git a/js/node/src/inference_session_wrap.cc b/js/node/src/inference_session_wrap.cc index 1bbb6df1ce1c8..b85104cadc6ed 100644 --- a/js/node/src/inference_session_wrap.cc +++ b/js/node/src/inference_session_wrap.cc @@ -252,6 +252,9 @@ Napi::Value InferenceSessionWrap::ListSupportedBackends(const Napi::CallbackInfo #ifdef USE_COREML result.Set(result.Length(), createObject("coreml", true)); #endif +#ifdef USE_QNN + result.Set(result.Length(), createObject("qnn", true)); +#endif return scope.Escape(result); } diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc index a0de832d87fe5..46e08010b7835 100644 --- a/js/node/src/session_options_helper.cc +++ b/js/node/src/session_options_helper.cc @@ -80,6 +80,13 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions &sess #ifdef USE_COREML } else if (name == "coreml") { Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(sessionOptions, coreMlFlags)); +#endif +#ifdef USE_QNN + } else if (name == "qnn") { + std::unordered_map qnn_options; + qnn_options["backend_path"] = "QnnHtp.dll"; + qnn_options["enable_htp_fp16_precision"] = "1"; + sessionOptions.AppendExecutionProvider("QNN", qnn_options); #endif } else { ORT_NAPI_THROW_ERROR(epList.Env(), "Invalid argument: sessionOptions.executionProviders[", i,