Enable QNN HTP support for Node (#20576)

### Description Add support for using Onnx Runtime with Node ### Motivation and Context Onnx Runtime supports the QNN HTP, but does not support it for Node.js. This adds baseline support for the Onnx Runtime to be used with Node. Note it does not update the node packages that are distributed officially. This simply patches the onnxruntime.dll to allow 'qnn' to be used as an execution provider. Testing was done using the existing onnxruntime-node package. The `onnxruntime.dll` and `onnxruntime_binding.node` were swapped into `node_modules\onnxruntime-node\bin\napi-v3\win32\arm64` with the newly built version, then the various QNN dlls and .so files were placed next to the onnxruntime.dll. Testing was performed on a variety of models and applications, but the easiest test is to modify the [node quickstart example](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/js/quick-start_onnxruntime-node).
microsoft · May 9, 2024 · 768c793 · 768c793
1 parent d1cbb3e
commit 768c793
Show file tree

Hide file tree

Showing 7 changed files with 34 additions and 1 deletion.
diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
@@ -71,6 +71,12 @@ if(onnxruntime_target_platform STREQUAL "ARM64EC")
     endif()
 endif()
 
+if(onnxruntime_target_platform STREQUAL "ARM64")
+    if (MSVC)
+        add_compile_options("/bigobj")
+    endif()
+endif()
+
 file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS
     ${onnxruntime_common_src_patterns}
     )

diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake
@@ -73,6 +73,9 @@ endif()
 if (onnxruntime_USE_COREML)
     set(NODEJS_BINDING_USE_COREML "--use_coreml")
 endif()
+if (onnxruntime_USE_QNN)
+    set(NODEJS_BINDING_USE_QNN "--use_qnn")
+endif()
 
 if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
 # add custom target
@@ -90,7 +93,7 @@ add_custom_target(nodejs_binding_wrapper ALL
     COMMAND ${NPM_CLI} ci
     COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
         --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
-        ${NODEJS_BINDING_USE_COREML}
+        ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
     WORKING_DIRECTORY ${JS_NODE_ROOT}
     COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
 

diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
@@ -201,6 +201,7 @@ export declare namespace InferenceSession {
     webgl: WebGLExecutionProviderOption;
     webgpu: WebGpuExecutionProviderOption;
     webnn: WebNNExecutionProviderOption;
+    qnn: QnnExecutionProviderOption;
     xnnpack: XnnpackExecutionProviderOption;
   }
 
@@ -247,6 +248,10 @@ export declare namespace InferenceSession {
     numThreads?: number;
     powerPreference?: 'default'|'low-power'|'high-performance';
   }
+  export interface QnnExecutionProviderOption extends ExecutionProviderOption {
+    readonly name: 'qnn';
+    // TODO add flags
+  }
   export interface CoreMLExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'coreml';
     /**

diff --git a/js/node/CMakeLists.txt b/js/node/CMakeLists.txt
@@ -37,6 +37,7 @@ option(USE_DML "Build with DirectML support" OFF)
 option(USE_CUDA "Build with CUDA support" OFF)
 option(USE_TENSORRT "Build with TensorRT support" OFF)
 option(USE_COREML "Build with CoreML support" OFF)
+option(USE_QNN "Build with QNN support" OFF)
 
 if(USE_DML)
   add_compile_definitions(USE_DML=1)
@@ -50,6 +51,9 @@ endif()
 if(USE_COREML)
   add_compile_definitions(USE_COREML=1)
 endif()
+if(USE_QNN)
+  add_compile_definitions(USE_QNN=1)
+endif()
 
 # source files
 file(GLOB ORT_NODEJS_BINDING_SOURCE_FILES ${CMAKE_SOURCE_DIR}/src/*.cc)

diff --git a/js/node/script/build.ts b/js/node/script/build.ts
@@ -35,6 +35,8 @@ const USE_CUDA = !!buildArgs.use_cuda;
 const USE_TENSORRT = !!buildArgs.use_tensorrt;
 // --use_coreml
 const USE_COREML = !!buildArgs.use_coreml;
+// --use_qnn
+const USE_QNN = !!buildArgs.use_qnn;
 
 // build path
 const ROOT_FOLDER = path.join(__dirname, '..');
@@ -72,6 +74,9 @@ if (USE_TENSORRT) {
 if (USE_COREML) {
   args.push('--CDUSE_COREML=ON');
 }
+if (USE_QNN) {
+  args.push('--CDUSE_QNN=ON');
+}
 
 // set CMAKE_OSX_ARCHITECTURES for macOS build
 if (os.platform() === 'darwin') {

diff --git a/js/node/src/inference_session_wrap.cc b/js/node/src/inference_session_wrap.cc
@@ -252,6 +252,9 @@ Napi::Value InferenceSessionWrap::ListSupportedBackends(const Napi::CallbackInfo
 #ifdef USE_COREML
   result.Set(result.Length(), createObject("coreml", true));
 #endif
+#ifdef USE_QNN
+  result.Set(result.Length(), createObject("qnn", true));
+#endif
 
   return scope.Escape(result);
 }
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
@@ -80,6 +80,13 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions &sess
 #ifdef USE_COREML
     } else if (name == "coreml") {
       Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(sessionOptions, coreMlFlags));
+#endif
+#ifdef USE_QNN
+    } else if (name == "qnn") {
+      std::unordered_map<std::string, std::string> qnn_options;
+      qnn_options["backend_path"] = "QnnHtp.dll";
+      qnn_options["enable_htp_fp16_precision"] = "1";
+      sessionOptions.AppendExecutionProvider("QNN", qnn_options);
 #endif
     } else {
       ORT_NAPI_THROW_ERROR(epList.Env(), "Invalid argument: sessionOptions.executionProviders[", i,