From 768c79317c41b066e5bf78ff617c91477ba17fd1 Mon Sep 17 00:00:00 2001
From: Jon Campbell <jcampbell@cephable.com>
Date: Thu, 9 May 2024 13:11:07 -0700
Subject: [PATCH] Enable QNN HTP support for Node (#20576)

### Description
Add support for using Onnx Runtime with Node

### Motivation and Context
Onnx Runtime supports the QNN HTP, but does not support it for Node.js.
This adds baseline support for the Onnx Runtime to be used with Node.

Note it does not update the node packages that are distributed
officially. This simply patches the onnxruntime.dll to allow 'qnn' to be
used as an execution provider.

Testing was done using the existing onnxruntime-node package. The
`onnxruntime.dll` and `onnxruntime_binding.node` were swapped into
`node_modules\onnxruntime-node\bin\napi-v3\win32\arm64` with the newly
built version, then the various QNN dlls and .so files were placed next
to the onnxruntime.dll. Testing was performed on a variety of models and
applications, but the easiest test is to modify the [node quickstart
example](https://github.com/microsoft/onnxruntime-inference-examples/tree/main/js/quick-start_onnxruntime-node).
---
 cmake/onnxruntime_common.cmake        | 6 ++++++
 cmake/onnxruntime_nodejs.cmake        | 5 ++++-
 js/common/lib/inference-session.ts    | 5 +++++
 js/node/CMakeLists.txt                | 4 ++++
 js/node/script/build.ts               | 5 +++++
 js/node/src/inference_session_wrap.cc | 3 +++
 js/node/src/session_options_helper.cc | 7 +++++++
 7 files changed, 34 insertions(+), 1 deletion(-)

diff --git a/cmake/onnxruntime_common.cmake b/cmake/onnxruntime_common.cmake
index 69d8f5fa138c7..896379d743441 100644
--- a/cmake/onnxruntime_common.cmake
+++ b/cmake/onnxruntime_common.cmake
@@ -71,6 +71,12 @@ if(onnxruntime_target_platform STREQUAL "ARM64EC")
     endif()
 endif()
 
+if(onnxruntime_target_platform STREQUAL "ARM64")
+    if (MSVC)
+        add_compile_options("/bigobj")
+    endif()
+endif()
+
 file(GLOB onnxruntime_common_src CONFIGURE_DEPENDS
     ${onnxruntime_common_src_patterns}
     )
diff --git a/cmake/onnxruntime_nodejs.cmake b/cmake/onnxruntime_nodejs.cmake
index 555baac6f1a51..f11928c11cf14 100644
--- a/cmake/onnxruntime_nodejs.cmake
+++ b/cmake/onnxruntime_nodejs.cmake
@@ -73,6 +73,9 @@ endif()
 if (onnxruntime_USE_COREML)
     set(NODEJS_BINDING_USE_COREML "--use_coreml")
 endif()
+if (onnxruntime_USE_QNN)
+    set(NODEJS_BINDING_USE_QNN "--use_qnn")
+endif()
 
 if(NOT onnxruntime_ENABLE_STATIC_ANALYSIS)
 # add custom target
@@ -90,7 +93,7 @@ add_custom_target(nodejs_binding_wrapper ALL
     COMMAND ${NPM_CLI} ci
     COMMAND ${NPM_CLI} run build -- --onnxruntime-build-dir=${CMAKE_CURRENT_BINARY_DIR} --config=${CMAKE_BUILD_TYPE} --onnxruntime-generator=${CMAKE_GENERATOR}
         --arch=${NODEJS_BINDING_ARCH} ${NODEJS_BINDING_USE_CUDA} ${NODEJS_BINDING_USE_DML} ${NODEJS_BINDING_USE_TENSORRT}
-        ${NODEJS_BINDING_USE_COREML}
+        ${NODEJS_BINDING_USE_COREML} ${NODEJS_BINDING_USE_QNN}
     WORKING_DIRECTORY ${JS_NODE_ROOT}
     COMMENT "Using cmake-js to build OnnxRuntime Node.js binding")
 
diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
index 14db5c59d972a..353d93bbc34ae 100644
--- a/js/common/lib/inference-session.ts
+++ b/js/common/lib/inference-session.ts
@@ -201,6 +201,7 @@ export declare namespace InferenceSession {
     webgl: WebGLExecutionProviderOption;
     webgpu: WebGpuExecutionProviderOption;
     webnn: WebNNExecutionProviderOption;
+    qnn: QnnExecutionProviderOption;
     xnnpack: XnnpackExecutionProviderOption;
   }
 
@@ -247,6 +248,10 @@ export declare namespace InferenceSession {
     numThreads?: number;
     powerPreference?: 'default'|'low-power'|'high-performance';
   }
+  export interface QnnExecutionProviderOption extends ExecutionProviderOption {
+    readonly name: 'qnn';
+    // TODO add flags
+  }
   export interface CoreMLExecutionProviderOption extends ExecutionProviderOption {
     readonly name: 'coreml';
     /**
diff --git a/js/node/CMakeLists.txt b/js/node/CMakeLists.txt
index 8157df288eeb9..5c32f62f3a802 100644
--- a/js/node/CMakeLists.txt
+++ b/js/node/CMakeLists.txt
@@ -37,6 +37,7 @@ option(USE_DML "Build with DirectML support" OFF)
 option(USE_CUDA "Build with CUDA support" OFF)
 option(USE_TENSORRT "Build with TensorRT support" OFF)
 option(USE_COREML "Build with CoreML support" OFF)
+option(USE_QNN "Build with QNN support" OFF)
 
 if(USE_DML)
   add_compile_definitions(USE_DML=1)
@@ -50,6 +51,9 @@ endif()
 if(USE_COREML)
   add_compile_definitions(USE_COREML=1)
 endif()
+if(USE_QNN)
+  add_compile_definitions(USE_QNN=1)
+endif()
 
 # source files
 file(GLOB ORT_NODEJS_BINDING_SOURCE_FILES ${CMAKE_SOURCE_DIR}/src/*.cc)
diff --git a/js/node/script/build.ts b/js/node/script/build.ts
index cc59507179085..3f0f804ed368e 100644
--- a/js/node/script/build.ts
+++ b/js/node/script/build.ts
@@ -35,6 +35,8 @@ const USE_CUDA = !!buildArgs.use_cuda;
 const USE_TENSORRT = !!buildArgs.use_tensorrt;
 // --use_coreml
 const USE_COREML = !!buildArgs.use_coreml;
+// --use_qnn
+const USE_QNN = !!buildArgs.use_qnn;
 
 // build path
 const ROOT_FOLDER = path.join(__dirname, '..');
@@ -72,6 +74,9 @@ if (USE_TENSORRT) {
 if (USE_COREML) {
   args.push('--CDUSE_COREML=ON');
 }
+if (USE_QNN) {
+  args.push('--CDUSE_QNN=ON');
+}
 
 // set CMAKE_OSX_ARCHITECTURES for macOS build
 if (os.platform() === 'darwin') {
diff --git a/js/node/src/inference_session_wrap.cc b/js/node/src/inference_session_wrap.cc
index 1bbb6df1ce1c8..b85104cadc6ed 100644
--- a/js/node/src/inference_session_wrap.cc
+++ b/js/node/src/inference_session_wrap.cc
@@ -252,6 +252,9 @@ Napi::Value InferenceSessionWrap::ListSupportedBackends(const Napi::CallbackInfo
 #ifdef USE_COREML
   result.Set(result.Length(), createObject("coreml", true));
 #endif
+#ifdef USE_QNN
+  result.Set(result.Length(), createObject("qnn", true));
+#endif
 
   return scope.Escape(result);
 }
diff --git a/js/node/src/session_options_helper.cc b/js/node/src/session_options_helper.cc
index a0de832d87fe5..46e08010b7835 100644
--- a/js/node/src/session_options_helper.cc
+++ b/js/node/src/session_options_helper.cc
@@ -80,6 +80,13 @@ void ParseExecutionProviders(const Napi::Array epList, Ort::SessionOptions &sess
 #ifdef USE_COREML
     } else if (name == "coreml") {
       Ort::ThrowOnError(OrtSessionOptionsAppendExecutionProvider_CoreML(sessionOptions, coreMlFlags));
+#endif
+#ifdef USE_QNN
+    } else if (name == "qnn") {
+      std::unordered_map<std::string, std::string> qnn_options;
+      qnn_options["backend_path"] = "QnnHtp.dll";
+      qnn_options["enable_htp_fp16_precision"] = "1";
+      sessionOptions.AppendExecutionProvider("QNN", qnn_options);
 #endif
     } else {
       ORT_NAPI_THROW_ERROR(epList.Env(), "Invalid argument: sessionOptions.executionProviders[", i,