diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json
index df27fa5ab1b95..07dff50f9a3bd 100644
--- a/cgmanifests/generated/cgmanifest.json
+++ b/cgmanifests/generated/cgmanifest.json
@@ -346,7 +346,7 @@
       "component": {
         "type": "git",
         "git": {
-          "commitHash": "511eb80847afe6bded34ec491a38d5d78ba2d604",
+          "commitHash": "12a3b24c456cebd9fd11f23ac0164f78129b00c6",
           "repositoryUrl": "https://github.com/google/dawn.git"
         },
         "comments": "dawn"
diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
index 70ac62954ad6d..7710ab2f4cac7 100644
--- a/cmake/CMakeLists.txt
+++ b/cmake/CMakeLists.txt
@@ -148,6 +148,7 @@ option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternat
 option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF)
 option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF)
 option(onnxruntime_USE_EXTERNAL_DAWN "Build with treating Dawn as external dependency. Will not link Dawn at build time." OFF)
+option(onnxruntime_CUSTOM_DAWN_SRC_PATH "Path to custom Dawn src dir.")
 
 # Options related to reducing the binary size produced by the build
 # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON
diff --git a/cmake/deps.txt b/cmake/deps.txt
index 9cf92bf417fcb..21f9ee1701c46 100644
--- a/cmake/deps.txt
+++ b/cmake/deps.txt
@@ -58,5 +58,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d839
 composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557
 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e
 cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1
-dawn;https://github.com/google/dawn/archive/511eb80847afe6bded34ec491a38d5d78ba2d604.zip;c493f5aca5586f6634e25d0121c85df71189fb99
+dawn;https://github.com/google/dawn/archive/12a3b24c456cebd9fd11f23ac0164f78129b00c6.zip;ad428f6dc16f1336d584f7bad5714e1097dafc43
 kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681
diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake
index d9e833a2d8cd4..ee7abcbad025c 100644
--- a/cmake/external/onnxruntime_external_deps.cmake
+++ b/cmake/external/onnxruntime_external_deps.cmake
@@ -615,12 +615,25 @@ if (onnxruntime_USE_COREML)
 endif()
 
 if (onnxruntime_USE_WEBGPU)
-  FetchContent_Declare(
-    dawn
-    URL ${DEP_URL_dawn}
-    URL_HASH SHA1=${DEP_SHA1_dawn}
-    PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
-  )
+  if (onnxruntime_CUSTOM_DAWN_SRC_PATH)
+    # use the custom dawn source path if provided
+    #
+    # specified as:
+    # build.py --use_webgpu --cmake_extra_defines "onnxruntime_CUSTOM_DAWN_SRC_PATH=<PATH_TO_DAWN_SRC_ROOT>"
+    FetchContent_Declare(
+      dawn
+      SOURCE_DIR ${onnxruntime_CUSTOM_DAWN_SRC_PATH}
+    )
+  else()
+    FetchContent_Declare(
+      dawn
+      URL ${DEP_URL_dawn}
+      URL_HASH SHA1=${DEP_SHA1_dawn}
+      # All previous patches are merged into the upstream dawn project. We don't need to apply any patches right now.
+      # if we need to apply patches in the future, we can uncomment the following line.
+      # PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch
+    )
+  endif()
 
   # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size
   set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE)
diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch
deleted file mode 100644
index 7a2a01d55be46..0000000000000
--- a/cmake/patches/dawn/dawn.patch
+++ /dev/null
@@ -1,81 +0,0 @@
-diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt
-index 9c0bd6fa4e..bf8a57aeac 100644
---- a/src/dawn/native/CMakeLists.txt
-+++ b/src/dawn/native/CMakeLists.txt
-@@ -857,6 +857,11 @@ if (DAWN_ENABLE_SWIFTSHADER)
-     target_compile_definitions(dawn_native PRIVATE "DAWN_ENABLE_SWIFTSHADER")
- endif()
-
-+if (IOS)
-+    target_compile_options(dawn_native_objects PRIVATE -fno-objc-arc)
-+    target_compile_options(dawn_native PRIVATE -fno-objc-arc)
-+endif()
-+
- if (DAWN_BUILD_MONOLITHIC_LIBRARY)
-     ###############################################################################
-     # Do the 'complete_lib' build.
-diff --git a/src/dawn/native/Surface_metal.mm b/src/dawn/native/Surface_metal.mm
-index ce55acbd43..2cfd363479 100644
---- a/src/dawn/native/Surface_metal.mm
-+++ b/src/dawn/native/Surface_metal.mm
-@@ -33,10 +33,18 @@
-
- #import <QuartzCore/CAMetalLayer.h>
-
-+#include "dawn/common/Platform.h"
-+
- namespace dawn::native {
-
- bool InheritsFromCAMetalLayer(void* obj) {
--    id<NSObject> object = static_cast<id>(obj);
-+    id<NSObject> object =
-+#if DAWN_PLATFORM_IS(IOS)
-+        (__bridge id)obj;
-+#else   // DAWN_PLATFORM_IS(IOS)
-+        static_cast<id>(obj);
-+#endif  // DAWN_PLATFORM_IS(IOS)
-+
-     return [object isKindOfClass:[CAMetalLayer class]];
- }
-
-diff --git a/src/dawn/native/metal/SharedFenceMTL.mm b/src/dawn/native/metal/SharedFenceMTL.mm
-index bde8bfea07..8906185d6f 100644
---- a/src/dawn/native/metal/SharedFenceMTL.mm
-+++ b/src/dawn/native/metal/SharedFenceMTL.mm
-@@ -25,6 +25,8 @@
- // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
- // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
-
-+#include "dawn/common/Platform.h"
-+
- #include "dawn/native/metal/SharedFenceMTL.h"
-
- #include "dawn/native/ChainUtils.h"
-@@ -39,8 +41,13 @@ ResultOrError<Ref<SharedFence>> SharedFence::Create(
-     const SharedFenceMTLSharedEventDescriptor* descriptor) {
-     DAWN_INVALID_IF(descriptor->sharedEvent == nullptr, "MTLSharedEvent is missing.");
-     if (@available(macOS 10.14, iOS 12.0, *)) {
--        return AcquireRef(new SharedFence(
--            device, label, static_cast<id<MTLSharedEvent>>(descriptor->sharedEvent)));
-+        return AcquireRef(new SharedFence(device, label,
-+#if DAWN_PLATFORM_IS(IOS)
-+                                          (__bridge id<MTLSharedEvent>)(descriptor->sharedEvent)
-+#else   // DAWN_PLATFORM_IS(IOS)
-+                                          static_cast<id<MTLSharedEvent>>(descriptor->sharedEvent)
-+#endif  // DAWN_PLATFORM_IS(IOS)
-+                                              ));
-     } else {
-         return DAWN_INTERNAL_ERROR("MTLSharedEvent not supported.");
-     }
-diff --git a/src/tint/api/BUILD.cmake b/src/tint/api/BUILD.cmake
-index 0037d83276..6372c4ee77 100644
---- a/src/tint/api/BUILD.cmake
-+++ b/src/tint/api/BUILD.cmake
-@@ -57,6 +57,7 @@ tint_target_add_dependencies(tint_api lib
-   tint_lang_wgsl_ast_transform
-   tint_lang_wgsl_common
-   tint_lang_wgsl_features
-+  tint_lang_wgsl_inspector
-   tint_lang_wgsl_program
-   tint_lang_wgsl_sem
-   tint_lang_wgsl_writer_ir_to_program
diff --git a/dockerfiles/Dockerfile.cuda b/dockerfiles/Dockerfile.cuda
index ce4560e9b0c7c..40f11dca623a7 100644
--- a/dockerfiles/Dockerfile.cuda
+++ b/dockerfiles/Dockerfile.cuda
@@ -48,7 +48,7 @@ RUN cd /code \
     && python3 -m venv /code/env \
     && . /code/env/bin/activate \
     && pip install --upgrade psutil setuptools wheel packaging \
-    && pip install -r tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt \
+    && pip install -r /code/tools/ci_build/github/linux/python/requirements.txt \
     && python /code/tools/ci_build/build.py --build_dir /code/build/Linux \
     --allow_running_as_root --skip_submodule_sync \
     --use_cuda --cuda_home /usr/local/cuda \
diff --git a/docs/How_To_Update_ONNX_Dev_Notes.md b/docs/How_To_Update_ONNX_Dev_Notes.md
index 4d8a286bde66e..199e6671f6a1a 100644
--- a/docs/How_To_Update_ONNX_Dev_Notes.md
+++ b/docs/How_To_Update_ONNX_Dev_Notes.md
@@ -21,7 +21,7 @@ This file should be generated. See [cgmanifests/README](/cgmanifests/README.md)
 - [onnxruntime/test/python/requirements.txt](/onnxruntime/test/python/requirements.txt)
 - [tools/ci_build/github/linux/docker/scripts/requirements.txt](/tools/ci_build/github/linux/docker/scripts/requirements.txt)
 - [tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt](/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt)
-- [tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt](/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt)
+- [tools/ci_build/github/linux/python/requirements.txt](/tools/ci_build/github/linux/python/requirements.txt)
 - Run `git grep -rn "onnx==1" .` to find other locations and update this document if necessary.
 
 1. If there is any change to `cmake/external/onnx/onnx/*.in.proto`, you need to regenerate OnnxMl.cs.
diff --git a/java/build-android.gradle b/java/build-android.gradle
index d5839f9f27869..9c4275b74f626 100644
--- a/java/build-android.gradle
+++ b/java/build-android.gradle
@@ -82,7 +82,7 @@ allprojects {
 }
 
 android {
-	compileSdkVersion 32
+	compileSdkVersion 34
 
 	defaultConfig {
 		minSdkVersion minSdkVer
@@ -108,8 +108,8 @@ android {
 	}
 
 	compileOptions {
-		sourceCompatibility = JavaVersion.VERSION_1_8
-		targetCompatibility = JavaVersion.VERSION_1_8
+		sourceCompatibility = JavaVersion.VERSION_17
+		targetCompatibility = JavaVersion.VERSION_17
 	}
 
 	sourceSets {
diff --git a/java/build.gradle b/java/build.gradle
index 34ac93cce6f4e..845121dd17a48 100644
--- a/java/build.gradle
+++ b/java/build.gradle
@@ -50,8 +50,8 @@ mavenSettings {
 }
 
 java {
-	sourceCompatibility = JavaVersion.VERSION_1_8
-	targetCompatibility = JavaVersion.VERSION_1_8
+	sourceCompatibility = JavaVersion.VERSION_17
+	targetCompatibility = JavaVersion.VERSION_17
 }
 
 // This jar tasks serves as a CMAKE signaling
diff --git a/java/gradle/wrapper/gradle-wrapper.properties b/java/gradle/wrapper/gradle-wrapper.properties
index 4baf5a11d45a3..381baa9cef1ec 100644
--- a/java/gradle/wrapper/gradle-wrapper.properties
+++ b/java/gradle/wrapper/gradle-wrapper.properties
@@ -1,7 +1,7 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionSha256Sum=9631d53cf3e74bfa726893aee1f8994fee4e060c401335946dba2156f440f24c
-distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip
+distributionSha256Sum=544c35d6bd849ae8a5ed0bcea39ba677dc40f49df7d1835561582da2009b961d
+distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip
 networkTimeout=10000
 validateDistributionUrl=true
 zipStoreBase=GRADLE_USER_HOME
diff --git a/java/gradlew.bat b/java/gradlew.bat
index 93e3f59f135dd..25da30dbdeee9 100644
--- a/java/gradlew.bat
+++ b/java/gradlew.bat
@@ -43,11 +43,11 @@ set JAVA_EXE=java.exe
 %JAVA_EXE% -version >NUL 2>&1
 if %ERRORLEVEL% equ 0 goto execute
 
-echo.
-echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
+echo. 1>&2
+echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2
+echo. 1>&2
+echo Please set the JAVA_HOME variable in your environment to match the 1>&2
+echo location of your Java installation. 1>&2
 
 goto fail
 
@@ -57,11 +57,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe
 
 if exist "%JAVA_EXE%" goto execute
 
-echo.
-echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
-echo.
-echo Please set the JAVA_HOME variable in your environment to match the
-echo location of your Java installation.
+echo. 1>&2
+echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2
+echo. 1>&2
+echo Please set the JAVA_HOME variable in your environment to match the 1>&2
+echo location of your Java installation. 1>&2
 
 goto fail
 
diff --git a/java/src/test/android/app/build.gradle b/java/src/test/android/app/build.gradle
index ecbc4b90612dd..baf18e714d25c 100644
--- a/java/src/test/android/app/build.gradle
+++ b/java/src/test/android/app/build.gradle
@@ -7,12 +7,12 @@ def minSdkVer = System.properties.get("minSdkVer")?:24
 def qnnVersion = System.properties['qnnVersion']
 
 android {
-	compileSdkVersion 32
+	compileSdkVersion 34
 
 	defaultConfig {
 		applicationId "ai.onnxruntime.example.javavalidator"
 		minSdkVersion minSdkVer
-		targetSdkVersion 32
+		targetSdkVersion 34
 		versionCode 1
 		versionName "1.0"
 
@@ -34,11 +34,11 @@ android {
 		}
 	}
 	compileOptions {
-		sourceCompatibility JavaVersion.VERSION_1_8
-		targetCompatibility JavaVersion.VERSION_1_8
+		sourceCompatibility JavaVersion.VERSION_17
+		targetCompatibility JavaVersion.VERSION_17
 	}
 	kotlinOptions {
-		jvmTarget = '1.8'
+		jvmTarget = '17'
 	}
 	// Conditional packagingOptions for QNN builds only
 	if (qnnVersion != null)	{
@@ -69,11 +69,11 @@ dependencies {
 	implementation 'com.google.android.material:material:1.3.0'
 	implementation 'androidx.constraintlayout:constraintlayout:2.0.4'
 	testImplementation 'junit:junit:4.+'
-	androidTestImplementation 'androidx.test.ext:junit:1.1.3'
-	androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
+	androidTestImplementation "androidx.test.ext:junit:1.1.5"
+	androidTestImplementation "androidx.test.espresso:espresso-core:3.5.0"
 
-	androidTestImplementation 'androidx.test:runner:1.4.0'
-	androidTestImplementation 'androidx.test:rules:1.4.0'
+	androidTestImplementation "androidx.test:runner:1.5.2"
+	androidTestImplementation "androidx.test:rules:1.5.0"
 	androidTestImplementation 'com.microsoft.appcenter:espresso-test-extension:1.4'
 
 	// dependencies for onnxruntime-android-qnn
diff --git a/js/.eslintrc.js b/js/.eslintrc.js
index bd1e9061355f5..462e417df1d66 100644
--- a/js/.eslintrc.js
+++ b/js/.eslintrc.js
@@ -198,19 +198,6 @@ module.exports = {
               '_OrtReleaseTensor',
               '_OrtRun',
               '_OrtRunWithBinding',
-              '_OrtTrainingCopyParametersFromBuffer',
-              '_OrtTrainingCopyParametersToBuffer',
-              '_OrtTrainingCreateSession',
-              '_OrtTrainingEvalStep',
-              '_OrtTrainingGetModelInputOutputCount',
-              '_OrtTrainingGetModelInputOutputName',
-              '_OrtTrainingGetParametersSize',
-              '_OrtTrainingLazyResetGrad',
-              '_OrtTrainingLoadCheckpoint',
-              '_OrtTrainingOptimizerStep',
-              '_OrtTrainingReleaseCheckpoint',
-              '_OrtTrainingReleaseSession',
-              '_OrtTrainingRunTrainStep',
             ],
           },
         ],
diff --git a/js/common/lib/backend.ts b/js/common/lib/backend.ts
index e27e67622aa82..e63f9c6c9147f 100644
--- a/js/common/lib/backend.ts
+++ b/js/common/lib/backend.ts
@@ -3,7 +3,6 @@
 
 import { InferenceSession } from './inference-session.js';
 import { OnnxValue } from './onnx-value.js';
-import { TrainingSession } from './training-session.js';
 
 /**
  * @ignore
@@ -42,33 +41,6 @@ export interface InferenceSessionHandler extends SessionHandler {
   ): Promise<SessionHandler.ReturnType>;
 }
 
-/**
- * Represent a handler instance of a training inference session.
- *
- * @ignore
- */
-export interface TrainingSessionHandler extends SessionHandler {
-  readonly evalInputNames: readonly string[];
-  readonly evalOutputNames: readonly string[];
-
-  lazyResetGrad(): Promise<void>;
-  runTrainStep(
-    feeds: SessionHandler.FeedsType,
-    fetches: SessionHandler.FetchesType,
-    options: InferenceSession.RunOptions,
-  ): Promise<SessionHandler.ReturnType>;
-  runOptimizerStep(options: InferenceSession.RunOptions): Promise<void>;
-  runEvalStep(
-    feeds: SessionHandler.FeedsType,
-    fetches: SessionHandler.FetchesType,
-    options: InferenceSession.RunOptions,
-  ): Promise<SessionHandler.ReturnType>;
-
-  getParametersSize(trainableOnly: boolean): Promise<number>;
-  loadParametersBuffer(buffer: Uint8Array, trainableOnly: boolean): Promise<void>;
-  getContiguousParameters(trainableOnly: boolean): Promise<OnnxValue>;
-}
-
 /**
  * Represent a backend that provides implementation of model inferencing.
  *
@@ -84,14 +56,6 @@ export interface Backend {
     uriOrBuffer: string | Uint8Array,
     options?: InferenceSession.SessionOptions,
   ): Promise<InferenceSessionHandler>;
-
-  createTrainingSessionHandler?(
-    checkpointStateUriOrBuffer: TrainingSession.UriOrBuffer,
-    trainModelUriOrBuffer: TrainingSession.UriOrBuffer,
-    evalModelUriOrBuffer: TrainingSession.UriOrBuffer,
-    optimizerModelUriOrBuffer: TrainingSession.UriOrBuffer,
-    options: InferenceSession.SessionOptions,
-  ): Promise<TrainingSessionHandler>;
 }
 
 export { registerBackend } from './backend-impl.js';
diff --git a/js/common/lib/env.ts b/js/common/lib/env.ts
index 642a897a90d26..e70f608ad7030 100644
--- a/js/common/lib/env.ts
+++ b/js/common/lib/env.ts
@@ -2,6 +2,7 @@
 // Licensed under the MIT License.
 
 import { env as envImpl } from './env-impl.js';
+import { TryGetGlobalType } from './type-helper.js';
 
 export declare namespace Env {
   export type WasmPathPrefix = string;
@@ -14,7 +15,6 @@ export declare namespace Env {
      * If not modified, the filename of the .wasm file is:
      * - `ort-wasm-simd-threaded.wasm` for default build
      * - `ort-wasm-simd-threaded.jsep.wasm` for JSEP build (with WebGPU and WebNN)
-     * - `ort-training-wasm-simd-threaded.wasm` for training build
      */
     wasm?: URL | string;
     /**
@@ -25,7 +25,6 @@ export declare namespace Env {
      * If not modified, the filename of the .mjs file is:
      * - `ort-wasm-simd-threaded.mjs` for default build
      * - `ort-wasm-simd-threaded.jsep.mjs` for JSEP build (with WebGPU and WebNN)
-     * - `ort-training-wasm-simd-threaded.mjs` for training build
      */
     mjs?: URL | string;
   }
@@ -200,22 +199,16 @@ export declare namespace Env {
      * value will be the GPU adapter that created by the underlying WebGPU backend.
      *
      * When use with TypeScript, the type of this property is `GPUAdapter` defined in "@webgpu/types".
-     * Use `const adapter = env.webgpu.adapter as GPUAdapter;` in TypeScript to access this property with correct type.
-     *
-     * see comments on {@link Tensor.GpuBufferType}
      */
-    adapter: unknown;
+    adapter: TryGetGlobalType<'GPUAdapter'>;
     /**
      * Get the device for WebGPU.
      *
      * This property is only available after the first WebGPU inference session is created.
      *
      * When use with TypeScript, the type of this property is `GPUDevice` defined in "@webgpu/types".
-     * Use `const device = env.webgpu.device as GPUDevice;` in TypeScript to access this property with correct type.
-     *
-     * see comments on {@link Tensor.GpuBufferType} for more details about why not use types defined in "@webgpu/types".
      */
-    readonly device: unknown;
+    readonly device: TryGetGlobalType<'GPUDevice'>;
     /**
      * Set or get whether validate input content.
      *
diff --git a/js/common/lib/index.ts b/js/common/lib/index.ts
index 3ed56b3c2e812..d75e6a477258d 100644
--- a/js/common/lib/index.ts
+++ b/js/common/lib/index.ts
@@ -26,4 +26,3 @@ export * from './tensor-factory.js';
 export * from './trace.js';
 export * from './onnx-model.js';
 export * from './onnx-value.js';
-export * from './training-session.js';
diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts
index 547db029471a2..e62c6579e8333 100644
--- a/js/common/lib/inference-session.ts
+++ b/js/common/lib/inference-session.ts
@@ -4,6 +4,7 @@
 import { InferenceSession as InferenceSessionImpl } from './inference-session-impl.js';
 import { OnnxModelOptions } from './onnx-model.js';
 import { OnnxValue, OnnxValueDataLocation } from './onnx-value.js';
+import { TryGetGlobalType } from './type-helper.js';
 
 /* eslint-disable @typescript-eslint/no-redeclare */
 
@@ -282,7 +283,7 @@ export declare namespace InferenceSession {
     extends WebNNExecutionProviderName,
       Omit<WebNNContextOptions, 'deviceType'>,
       Required<Pick<WebNNContextOptions, 'deviceType'>> {
-    context: unknown /* MLContext */;
+    context: TryGetGlobalType<'MLContext'>;
   }
 
   /**
@@ -291,8 +292,8 @@ export declare namespace InferenceSession {
    * @see https://www.w3.org/TR/webnn/#dom-ml-createcontext-gpudevice
    */
   export interface WebNNOptionsWebGpu extends WebNNExecutionProviderName {
-    context: unknown /* MLContext */;
-    gpuDevice: unknown /* GPUDevice */;
+    context: TryGetGlobalType<'MLContext'>;
+    gpuDevice: TryGetGlobalType<'GPUDevice'>;
   }
 
   /**
diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts
index af918705b97e3..05553bd96662b 100644
--- a/js/common/lib/tensor.ts
+++ b/js/common/lib/tensor.ts
@@ -4,6 +4,7 @@
 import { TensorFactory } from './tensor-factory.js';
 import { Tensor as TensorImpl } from './tensor-impl.js';
 import { TypedTensorUtils } from './tensor-utils.js';
+import { TryGetGlobalType } from './type-helper.js';
 
 /* eslint-disable @typescript-eslint/no-redeclare */
 
@@ -131,24 +132,19 @@ export declare namespace Tensor {
    */
   export type TextureDataTypes = 'float32';
 
+  type GpuBufferTypeFallback = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };
   /**
    * type alias for WebGPU buffer
-   *
-   * The reason why we don't use type "GPUBuffer" defined in webgpu.d.ts from @webgpu/types is because "@webgpu/types"
-   * requires "@types/dom-webcodecs" as peer dependency when using TypeScript < v5.1 and its version need to be chosen
-   * carefully according to the TypeScript version being used. This means so far there is not a way to keep every
-   * TypeScript version happy. It turns out that we will easily broke users on some TypeScript version.
-   *
-   * for more info see https://github.com/gpuweb/types/issues/127
    */
-  export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' };
+  export type GpuBufferType = TryGetGlobalType<'GPUBuffer', GpuBufferTypeFallback>;
 
+  type MLTensorTypeFallback = { destroy(): void };
   /**
    * type alias for WebNN MLTensor
    *
    * The specification for WebNN's MLTensor is currently in flux.
    */
-  export type MLTensorType = unknown;
+  export type MLTensorType = TryGetGlobalType<'MLTensor', MLTensorTypeFallback>;
 
   /**
    * supported data types for constructing a tensor from a WebGPU buffer
diff --git a/js/common/lib/training-session-impl.ts b/js/common/lib/training-session-impl.ts
deleted file mode 100644
index 21dbe5fe51bb9..0000000000000
--- a/js/common/lib/training-session-impl.ts
+++ /dev/null
@@ -1,273 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-import { resolveBackendAndExecutionProviders } from './backend-impl.js';
-import { SessionHandler, TrainingSessionHandler } from './backend.js';
-import { InferenceSession as InferenceSession } from './inference-session.js';
-import { OnnxValue } from './onnx-value.js';
-import { Tensor } from './tensor.js';
-import { TrainingSession as TrainingSessionInterface, TrainingSessionCreateOptions } from './training-session.js';
-
-type SessionOptions = InferenceSession.SessionOptions;
-type FeedsType = InferenceSession.FeedsType;
-type FetchesType = InferenceSession.FetchesType;
-type ReturnType = InferenceSession.ReturnType;
-type RunOptions = InferenceSession.RunOptions;
-
-const noBackendErrMsg: string =
-  'Training backend could not be resolved. ' + "Make sure you're using the correct configuration & WebAssembly files.";
-
-export class TrainingSession implements TrainingSessionInterface {
-  private constructor(handler: TrainingSessionHandler, hasOptimizerModel: boolean, hasEvalModel: boolean) {
-    this.handler = handler;
-    this.hasOptimizerModel = hasOptimizerModel;
-    this.hasEvalModel = hasEvalModel;
-  }
-  private handler: TrainingSessionHandler;
-  private hasOptimizerModel: boolean;
-  private hasEvalModel: boolean;
-
-  get trainingInputNames(): readonly string[] {
-    return this.handler.inputNames;
-  }
-  get trainingOutputNames(): readonly string[] {
-    return this.handler.outputNames;
-  }
-
-  get evalInputNames(): readonly string[] {
-    if (this.hasEvalModel) {
-      return this.handler.evalInputNames;
-    } else {
-      throw new Error('This training session has no evalModel loaded.');
-    }
-  }
-  get evalOutputNames(): readonly string[] {
-    if (this.hasEvalModel) {
-      return this.handler.evalOutputNames;
-    } else {
-      throw new Error('This training session has no evalModel loaded.');
-    }
-  }
-
-  static async create(
-    trainingOptions: TrainingSessionCreateOptions,
-    sessionOptions?: SessionOptions,
-  ): Promise<TrainingSession> {
-    const evalModel: string | Uint8Array = trainingOptions.evalModel || '';
-    const optimizerModel: string | Uint8Array = trainingOptions.optimizerModel || '';
-    const options: SessionOptions = sessionOptions || {};
-
-    // resolve backend, update session options with validated EPs, and create session handler
-    const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options);
-    if (backend.createTrainingSessionHandler) {
-      const handler = await backend.createTrainingSessionHandler(
-        trainingOptions.checkpointState,
-        trainingOptions.trainModel,
-        evalModel,
-        optimizerModel,
-        optionsWithValidatedEPs,
-      );
-      return new TrainingSession(handler, !!trainingOptions.optimizerModel, !!trainingOptions.evalModel);
-    } else {
-      throw new Error(noBackendErrMsg);
-    }
-  }
-
-  /**
-   * Helper function for runTrainStep and future runStep methods that handles the type-narrowing conversion from
-   * the given parameters to SessionHandler.FetchesType and RunOptions.
-   *
-   * @param inputNames the feeds object is checked that they contain all input names in the provided list of input
-   * names.
-   * @param outputNames the fetches object is checked that their keys match up with valid names in the list of output
-   * names.
-   * @param feeds the required input
-   * @param arg1 narrowed & converted into the SessionHandler.FetchesType or RunOptions object
-   * @param arg2 optional RunOptions object.
-   * @returns
-   */
-  typeNarrowingForRunStep(
-    inputNames: readonly string[],
-    outputNames: readonly string[],
-    feeds: FeedsType,
-    arg1?: FetchesType | RunOptions,
-    arg2?: RunOptions,
-  ): [SessionHandler.FetchesType, RunOptions] {
-    const fetches: { [name: string]: OnnxValue | null } = {};
-    let options: RunOptions = {};
-    // check inputs
-    if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) {
-      throw new TypeError(
-        "'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.",
-      );
-    }
-
-    let isFetchesEmpty = true;
-    // determine which override is being used
-    if (typeof arg1 === 'object') {
-      if (arg1 === null) {
-        throw new TypeError('Unexpected argument[1]: cannot be null.');
-      }
-      if (arg1 instanceof Tensor) {
-        throw new TypeError("'fetches' cannot be a Tensor");
-      }
-
-      if (Array.isArray(arg1)) {
-        if (arg1.length === 0) {
-          throw new TypeError("'fetches' cannot be an empty array.");
-        }
-        isFetchesEmpty = false;
-        // output names
-        for (const name of arg1) {
-          if (typeof name !== 'string') {
-            throw new TypeError("'fetches' must be a string array or an object.");
-          }
-          if (outputNames.indexOf(name) === -1) {
-            throw new RangeError(`'fetches' contains invalid output name: ${name}.`);
-          }
-          fetches[name] = null;
-        }
-
-        if (typeof arg2 === 'object' && arg2 !== null) {
-          options = arg2;
-        } else if (typeof arg2 !== 'undefined') {
-          throw new TypeError("'options' must be an object.");
-        }
-      } else {
-        // decide whether arg1 is fetches or options
-        // if any output name is present and its value is valid OnnxValue, we consider it fetches
-        let isFetches = false;
-        const arg1Keys = Object.getOwnPropertyNames(arg1);
-        for (const name of outputNames) {
-          if (arg1Keys.indexOf(name) !== -1) {
-            const v = (arg1 as InferenceSession.NullableOnnxValueMapType)[name];
-            if (v === null || v instanceof Tensor) {
-              isFetches = true;
-              isFetchesEmpty = false;
-              fetches[name] = v;
-            }
-          }
-        }
-
-        if (isFetches) {
-          if (typeof arg2 === 'object' && arg2 !== null) {
-            options = arg2;
-          } else if (typeof arg2 !== 'undefined') {
-            throw new TypeError("'options' must be an object.");
-          }
-        } else {
-          options = arg1 as RunOptions;
-        }
-      }
-    } else if (typeof arg1 !== 'undefined') {
-      throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'.");
-    }
-
-    // check if all inputs are in feed
-    for (const name of inputNames) {
-      if (typeof feeds[name] === 'undefined') {
-        throw new Error(`input '${name}' is missing in 'feeds'.`);
-      }
-    }
-
-    // if no fetches is specified, we use the full output names list
-    if (isFetchesEmpty) {
-      for (const name of outputNames) {
-        fetches[name] = null;
-      }
-    }
-
-    return [fetches, options];
-  }
-
-  /**
-   * Helper method for runTrainStep and any other runStep methods. Takes the ReturnType result from the SessionHandler
-   * and changes it into a map of Tensors.
-   *
-   * @param results
-   * @returns
-   */
-  convertHandlerReturnTypeToMapOfTensors(results: SessionHandler.ReturnType): ReturnType {
-    const returnValue: { [name: string]: OnnxValue } = {};
-    for (const key in results) {
-      if (Object.hasOwnProperty.call(results, key)) {
-        const result = results[key];
-        if (result instanceof Tensor) {
-          returnValue[key] = result;
-        } else {
-          returnValue[key] = new Tensor(result.type, result.data, result.dims);
-        }
-      }
-    }
-    return returnValue;
-  }
-
-  async lazyResetGrad(): Promise<void> {
-    await this.handler.lazyResetGrad();
-  }
-
-  runTrainStep(feeds: FeedsType, options?: RunOptions): Promise<ReturnType>;
-  runTrainStep(feeds: FeedsType, fetches: FetchesType, options?: RunOptions): Promise<ReturnType>;
-  async runTrainStep(feeds: FeedsType, arg1?: FetchesType | RunOptions, arg2?: RunOptions): Promise<ReturnType> {
-    const [fetches, options] = this.typeNarrowingForRunStep(
-      this.trainingInputNames,
-      this.trainingOutputNames,
-      feeds,
-      arg1,
-      arg2,
-    );
-    const results = await this.handler.runTrainStep(feeds, fetches, options);
-    return this.convertHandlerReturnTypeToMapOfTensors(results);
-  }
-
-  async runOptimizerStep(options?: InferenceSession.RunOptions | undefined): Promise<void> {
-    if (this.hasOptimizerModel) {
-      await this.handler.runOptimizerStep(options || {});
-    } else {
-      throw new Error('This TrainingSession has no OptimizerModel loaded.');
-    }
-  }
-
-  runEvalStep(feeds: FeedsType, options?: RunOptions | undefined): Promise<ReturnType>;
-  runEvalStep(feeds: FeedsType, fetches: FetchesType, options?: RunOptions | undefined): Promise<ReturnType>;
-  async runEvalStep(feeds: FeedsType, arg1?: FetchesType | RunOptions, arg2?: RunOptions): Promise<ReturnType> {
-    if (this.hasEvalModel) {
-      const [fetches, options] = this.typeNarrowingForRunStep(
-        this.evalInputNames,
-        this.evalOutputNames,
-        feeds,
-        arg1,
-        arg2,
-      );
-      const results = await this.handler.runEvalStep(feeds, fetches, options);
-      return this.convertHandlerReturnTypeToMapOfTensors(results);
-    } else {
-      throw new Error('This TrainingSession has no EvalModel loaded.');
-    }
-  }
-
-  async getParametersSize(trainableOnly = true): Promise<number> {
-    return this.handler.getParametersSize(trainableOnly);
-  }
-
-  async loadParametersBuffer(array: Uint8Array, trainableOnly = true): Promise<void> {
-    const paramsSize = await this.getParametersSize(trainableOnly);
-    // checking that the size of the Uint8Array is equivalent to the byte length of a Float32Array of the number
-    // of parameters
-    if (array.length !== 4 * paramsSize) {
-      throw new Error(
-        'Size of the buffer passed into loadParametersBuffer must match the number of parameters in ' +
-          'the model. Please use getParametersSize method to check.',
-      );
-    }
-    return this.handler.loadParametersBuffer(array, trainableOnly);
-  }
-
-  async getContiguousParameters(trainableOnly = true): Promise<OnnxValue> {
-    return this.handler.getContiguousParameters(trainableOnly);
-  }
-
-  async release(): Promise<void> {
-    return this.handler.dispose();
-  }
-}
diff --git a/js/common/lib/training-session.ts b/js/common/lib/training-session.ts
deleted file mode 100644
index 45dcafc46deb5..0000000000000
--- a/js/common/lib/training-session.ts
+++ /dev/null
@@ -1,206 +0,0 @@
-// Copyright (c) Microsoft Corporation. All rights reserved.
-// Licensed under the MIT License.
-
-import { InferenceSession } from './inference-session.js';
-import { OnnxValue } from './onnx-value.js';
-import { TrainingSession as TrainingSessionImpl } from './training-session-impl.js';
-
-/* eslint-disable @typescript-eslint/no-redeclare */
-
-export declare namespace TrainingSession {
-  /**
-   * Either URI file path (string) or Uint8Array containing model or checkpoint information.
-   */
-  type UriOrBuffer = string | Uint8Array;
-}
-
-/**
- * Represent a runtime instance of an ONNX training session,
- * which contains a model that can be trained, and, optionally,
- * an eval and optimizer model.
- */
-export interface TrainingSession {
-  // #region run()
-
-  /**
-   * Lazily resets the gradients of all trainable parameters to zero. Should happen after the invocation of
-   * runOptimizerStep.
-   */
-  lazyResetGrad(): Promise<void>;
-
-  /**
-   * Run TrainStep asynchronously with the given feeds and options.
-   *
-   * @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for
-   detail.
-   * @param options - Optional. A set of options that controls the behavior of model training.
-   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values.
-   */
-  runTrainStep(
-    feeds: InferenceSession.FeedsType,
-    options?: InferenceSession.RunOptions,
-  ): Promise<InferenceSession.ReturnType>;
-
-  /**
-   * Run a single train step with the given inputs and options.
-   *
-   * @param feeds - Representation of the model input.
-   * @param fetches - Representation of the model output.
-   * detail.
-   * @param options - Optional. A set of options that controls the behavior of model training.
-   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding
-   values.
-   */
-  runTrainStep(
-    feeds: InferenceSession.FeedsType,
-    fetches: InferenceSession.FetchesType,
-    options?: InferenceSession.RunOptions,
-  ): Promise<InferenceSession.ReturnType>;
-
-  /**
-   * Runs a single optimizer step, which performs weight updates for the trainable parameters using the optimizer model.
-   *
-   * @param options - Optional. A set of options that controls the behavior of model optimizing.
-   */
-  runOptimizerStep(options?: InferenceSession.RunOptions): Promise<void>;
-
-  /**
-   * Run a single eval step with the given inputs and options using the eval model.
-   *
-   * @param feeds - Representation of the model input.
-   * @param options - Optional. A set of options that controls the behavior of model eval step.
-   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding
-   values.
-   */
-  runEvalStep(
-    feeds: InferenceSession.FeedsType,
-    options?: InferenceSession.RunOptions,
-  ): Promise<InferenceSession.ReturnType>;
-
-  /**
-   * Run a single eval step with the given inputs and options using the eval model.
-   *
-   * @param feeds - Representation of the model input.
-   * @param fetches - Representation of the model output.
-   * detail.
-   * @param options - Optional. A set of options that controls the behavior of model eval step.
-   * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding
-   values.
-   */
-  runEvalStep(
-    feeds: InferenceSession.FeedsType,
-    fetches: InferenceSession.FetchesType,
-    options?: InferenceSession.RunOptions,
-  ): Promise<InferenceSession.ReturnType>;
-
-  // #endregion
-
-  // #region copy parameters
-
-  /**
-   * Retrieves the size of all parameters for the training state. Calculates the total number of primitive (datatype of
-   * the parameters) elements of all the parameters in the training state.
-   *
-   * @param trainableOnly - When set to true, the size is calculated for trainable params only. Default value is true.
-   */
-  getParametersSize(trainableOnly: boolean): Promise<number>;
-
-  /**
-   * Copies parameter values from the given buffer to the training state. Currently, only supporting models with
-   * parameters of type Float32.
-   *
-   * @param buffer - A Uint8Array representation of Float32 parameters.
-   * @param trainableOnly - True if trainable parameters only to be modified, false otherwise. Default value is true.
-   */
-  loadParametersBuffer(buffer: Uint8Array, trainableOnly: boolean): Promise<void>;
-
-  /**
-   * Copies the model parameters to a contiguous buffer. Usually used in the context of Federated Learning.
-   * Currently, only supporting models with parameters of type Float32.
-   *
-   * @param trainableOnly - When set to true, only trainable parameters are copied. Trainable parameters are parameters
-   * for which requires_grad is set to true. Default value is true.
-   * @returns A promise that resolves to a Float32 OnnxValue of the requested parameters.
-   */
-  getContiguousParameters(trainableOnly: boolean): Promise<OnnxValue>;
-  // #endregion
-
-  // #region release()
-
-  /**
-   * Release the inference session and the underlying resources.
-   */
-  release(): Promise<void>;
-  // #endregion
-
-  // #region metadata
-
-  /**
-   * Get input names of the loaded training model.
-   */
-  readonly trainingInputNames: readonly string[];
-
-  /**
-   * Get output names of the loaded training model.
-   */
-  readonly trainingOutputNames: readonly string[];
-
-  /**
-   * Get input names of the loaded eval model. Is an empty array if no eval model is loaded.
-   */
-  readonly evalInputNames: readonly string[];
-
-  /**
-   * Get output names of the loaded eval model. Is an empty array if no eval model is loaded.
-   */
-  readonly evalOutputNames: readonly string[];
-
-  // #endregion
-}
-
-/**
- * Represents the optional parameters that can be passed into the TrainingSessionFactory.
- */
-export interface TrainingSessionCreateOptions {
-  /**
-   * URI or buffer for a .ckpt file that contains the checkpoint for the training model.
-   */
-  checkpointState: TrainingSession.UriOrBuffer;
-  /**
-   * URI or buffer for the .onnx training file.
-   */
-  trainModel: TrainingSession.UriOrBuffer;
-  /**
-   * Optional. URI or buffer for the .onnx optimizer model file.
-   */
-  optimizerModel?: TrainingSession.UriOrBuffer;
-  /**
-   * Optional. URI or buffer for the .onnx eval model file.
-   */
-  evalModel?: TrainingSession.UriOrBuffer;
-}
-
-/**
- * Defines method overload possibilities for creating a TrainingSession.
- */
-export interface TrainingSessionFactory {
-  // #region create()
-
-  /**
-   * Creates a new TrainingSession and asynchronously loads any models passed in through trainingOptions
-   *
-   * @param trainingOptions specify models and checkpoints to load into the Training Session
-   * @param sessionOptions specify configuration for training session behavior
-   *
-   * @returns Promise that resolves to a TrainingSession object
-   */
-  create(
-    trainingOptions: TrainingSessionCreateOptions,
-    sessionOptions?: InferenceSession.SessionOptions,
-  ): Promise<TrainingSession>;
-
-  // #endregion
-}
-
-// eslint-disable-next-line @typescript-eslint/naming-convention
-export const TrainingSession: TrainingSessionFactory = TrainingSessionImpl;
diff --git a/js/common/lib/type-helper.ts b/js/common/lib/type-helper.ts
new file mode 100644
index 0000000000000..845ba3018d443
--- /dev/null
+++ b/js/common/lib/type-helper.ts
@@ -0,0 +1,31 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+/**
+ * A helper type to get certain types if they are declared in global scope.
+ *
+ * For example, if you installed "@webgpu/types" as a dev dependency, then `TryGetTypeIfDeclared<'GPUDevice'>` will
+ * be type `GPUDevice`, otherwise it will be type `unknown`.
+ *
+ *
+ * We don't want to introduce "@webgpu/types" as a dependency of this package because:
+ *
+ * (1) For JavaScript users, it's not needed. For TypeScript users, they can install it as dev dependency themselves.
+ *
+ * (2) because "@webgpu/types" requires "@types/dom-webcodecs" as peer dependency when using TypeScript < v5.1 and its
+ * version need to be chosen carefully according to the TypeScript version being used. This means so far there is not a
+ * way to keep every TypeScript version happy. It turns out that we will easily broke users on some TypeScript version.
+ *
+ * for more info see https://github.com/gpuweb/types/issues/127
+ *
+ * Update (2024-08-07): The reason (2) may be no longer valid. Most people should be using TypeScript >= 5.1 by now.
+ * However, we are still not sure whether introducing "@webgpu/types" as direct dependency is a good idea. We find this
+ * type helper is useful for TypeScript users.
+ *
+ * @ignore
+ */
+export type TryGetGlobalType<Name extends string, Fallback = unknown> = typeof globalThis extends {
+  [k in Name]: { prototype: infer T };
+}
+  ? T
+  : Fallback;
diff --git a/js/common/typedoc.json b/js/common/typedoc.json
index 088c7ba4053e6..f9c7e7b19db41 100644
--- a/js/common/typedoc.json
+++ b/js/common/typedoc.json
@@ -1,6 +1,7 @@
 {
   "entryPoints": ["lib/index.ts"],
   "excludeInternal": true,
+  "intentionallyNotExported": ["TryGetGlobalType"],
   "name": "ONNX Runtime JavaScript API",
   "readme": "none",
   "cleanOutputDir": true
diff --git a/js/node/package-lock.json b/js/node/package-lock.json
index 239c0b1ba557b..6d3c96e579a47 100644
--- a/js/node/package-lock.json
+++ b/js/node/package-lock.json
@@ -276,12 +276,12 @@
       "dev": true
     },
     "node_modules/axios": {
-      "version": "1.6.1",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz",
-      "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==",
+      "version": "1.7.9",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.9.tgz",
+      "integrity": "sha512-LhLcE7Hbiryz8oMDdDptSrWowmB4Bl6RCt6sIJKpRB4XtVf0iEgewX3au/pJqm+Py1kCASkb/FFKjxQaLtxJvw==",
       "dev": true,
       "dependencies": {
-        "follow-redirects": "^1.15.0",
+        "follow-redirects": "^1.15.6",
         "form-data": "^4.0.0",
         "proxy-from-env": "^1.1.0"
       }
@@ -455,9 +455,9 @@
       "dev": true
     },
     "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "dependencies": {
         "path-key": "^3.1.0",
         "shebang-command": "^2.0.0",
@@ -1581,12 +1581,12 @@
       "dev": true
     },
     "axios": {
-      "version": "1.6.1",
-      "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz",
-      "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==",
+      "version": "1.7.9",
+      "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.9.tgz",
+      "integrity": "sha512-LhLcE7Hbiryz8oMDdDptSrWowmB4Bl6RCt6sIJKpRB4XtVf0iEgewX3au/pJqm+Py1kCASkb/FFKjxQaLtxJvw==",
       "dev": true,
       "requires": {
-        "follow-redirects": "^1.15.0",
+        "follow-redirects": "^1.15.6",
         "form-data": "^4.0.0",
         "proxy-from-env": "^1.1.0"
       }
@@ -1725,9 +1725,9 @@
       "dev": true
     },
     "cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "requires": {
         "path-key": "^3.1.0",
         "shebang-command": "^2.0.0",
diff --git a/js/node/script/install.js b/js/node/script/install.js
index b15bc03840599..fef93f9169a2c 100644
--- a/js/node/script/install.js
+++ b/js/node/script/install.js
@@ -21,6 +21,7 @@ const os = require('os');
 const fs = require('fs');
 const path = require('path');
 const tar = require('tar');
+const { execFileSync } = require('child_process');
 const { Readable } = require('stream');
 
 // commandline flag:
@@ -58,10 +59,23 @@ if (NO_INSTALL || !shouldInstall) {
 
 // Step.2: Download the required binaries
 const artifactUrl = {
-  11: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${
-    ORT_VERSION
-  }.tgz`,
-  12: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-cuda12-${
+  get 11() {
+    // TODO: support ORT Cuda v11 binaries
+    throw new Error(`CUDA 11 binaries are not supported by this script yet.
+
+To use ONNX Runtime Node.js binding with CUDA v11 support, please follow the manual steps:
+
+1. Use "--onnxruntime-node-install-cuda=skip" to skip the auto installation.
+2. Navigate to https://aiinfra.visualstudio.com/PublicPackages/_artifacts/feed/onnxruntime-cuda-11
+3. Download the binaries for your platform and architecture
+4. Extract the following binaries to "node_modules/onnxruntime-node/bin/napi-v3/linux/x64:
+   - libonnxruntime_providers_tensorrt.so
+   - libonnxruntime_providers_shared.so
+   - libonnxruntime.so.${ORT_VERSION}
+   - libonnxruntime_providers_cuda.so
+`);
+  },
+  12: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${
     ORT_VERSION
   }.tgz`,
 }[INSTALL_CUDA_FLAG || tryGetCudaVersion()];
@@ -108,9 +122,27 @@ Use "--onnxruntime-node-install-cuda=skip" to skip the installation. You will st
 function tryGetCudaVersion() {
   // Should only return 11 or 12.
 
-  // TODO: try to get the CUDA version from the system ( `nvcc --version` )
+  // try to get the CUDA version from the system ( `nvcc --version` )
+  let ver = 12;
+  try {
+    const nvccVersion = execFileSync('nvcc', ['--version'], { encoding: 'utf8' });
+    const match = nvccVersion.match(/release (\d+)/);
+    if (match) {
+      ver = parseInt(match[1]);
+      if (ver !== 11 && ver !== 12) {
+        throw new Error(`Unsupported CUDA version: ${ver}`);
+      }
+    }
+  } catch (e) {
+    if (e?.code === 'ENOENT') {
+      console.warn('`nvcc` not found. Assuming CUDA 12.');
+    } else {
+      console.warn('Failed to detect CUDA version from `nvcc --version`:', e.message);
+    }
+  }
 
-  return 11;
+  // assume CUDA 12 if failed to detect
+  return ver;
 }
 
 function parseInstallCudaFlag() {
diff --git a/js/node/tsconfig.json b/js/node/tsconfig.json
index c154c3e148ed0..0401fb9609ad6 100644
--- a/js/node/tsconfig.json
+++ b/js/node/tsconfig.json
@@ -1,7 +1,8 @@
 {
   "extends": "../tsconfig.json",
   "compilerOptions": {
-    "outDir": "dist"
+    "outDir": "dist",
+    "declaration": true
   },
   "include": ["lib"]
 }
diff --git a/js/package-lock.json b/js/package-lock.json
index 594d0584ad80e..f4401c6e98c75 100644
--- a/js/package-lock.json
+++ b/js/package-lock.json
@@ -1573,9 +1573,9 @@
       "dev": true
     },
     "node_modules/cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "dev": true,
       "dependencies": {
         "path-key": "^3.1.0",
@@ -5922,9 +5922,9 @@
       "dev": true
     },
     "cross-spawn": {
-      "version": "7.0.3",
-      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz",
-      "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==",
+      "version": "7.0.6",
+      "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz",
+      "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==",
       "dev": true,
       "requires": {
         "path-key": "^3.1.0",
diff --git a/js/react_native/android/build.gradle b/js/react_native/android/build.gradle
index 825990eba0fb8..521866ff0f3e2 100644
--- a/js/react_native/android/build.gradle
+++ b/js/react_native/android/build.gradle
@@ -7,7 +7,7 @@ buildscript {
   }
 
   dependencies {
-    classpath 'com.android.tools.build:gradle:4.1.2'
+    classpath 'com.android.tools.build:gradle:7.4.2'
     // noinspection DifferentKotlinGradleVersion
   }
 }
@@ -221,9 +221,8 @@ dependencies {
   api "com.facebook.react:react-native:" + REACT_NATIVE_VERSION
   api "org.mockito:mockito-core:2.28.2"
 
-  androidTestImplementation "androidx.test:runner:1.1.0"
-  androidTestImplementation "androidx.test:rules:1.1.0"
-
+  androidTestImplementation "androidx.test:runner:1.5.2"
+  androidTestImplementation "androidx.test:rules:1.5.0"
   implementation "junit:junit:4.12"
 
   androidTestImplementation "com.linkedin.dexmaker:dexmaker-mockito-inline-extended:2.28.1"
diff --git a/js/react_native/android/gradle.properties b/js/react_native/android/gradle.properties
index 465b04d1f5813..8fe6e40d76911 100644
--- a/js/react_native/android/gradle.properties
+++ b/js/react_native/android/gradle.properties
@@ -4,7 +4,7 @@
 # Specifies the JVM arguments used for the daemon process.
 # The setting is particularly useful for tweaking memory settings.
 # Default value: -Xmx1024m -XX:MaxPermSize=256m
-# org.gradle.jvmargs=-Xmx2048m -XX:MaxPermSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
+org.gradle.jvmargs=-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8
 #
 # When configured, Gradle will run in incubating parallel mode.
 # This option should only be used with decoupled projects. More details, visit
diff --git a/js/react_native/android/gradle/wrapper/gradle-wrapper.jar b/js/react_native/android/gradle/wrapper/gradle-wrapper.jar
index 62d4c053550b9..249e5832f090a 100644
Binary files a/js/react_native/android/gradle/wrapper/gradle-wrapper.jar and b/js/react_native/android/gradle/wrapper/gradle-wrapper.jar differ
diff --git a/js/react_native/android/gradle/wrapper/gradle-wrapper.properties b/js/react_native/android/gradle/wrapper/gradle-wrapper.properties
index 51d930a381f3a..012d6d90445b4 100644
--- a/js/react_native/android/gradle/wrapper/gradle-wrapper.properties
+++ b/js/react_native/android/gradle/wrapper/gradle-wrapper.properties
@@ -1,6 +1,6 @@
 distributionBase=GRADLE_USER_HOME
 distributionPath=wrapper/dists
-distributionSha256Sum=7faa7198769f872826c8ef4f1450f839ec27f0b4d5d1e51bade63667cbccd205
-distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip
+distributionSha256Sum=cb87f222c5585bd46838ad4db78463a5c5f3d336e5e2b98dc7c0c586527351c2
+distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip
 zipStoreBase=GRADLE_USER_HOME
 zipStorePath=wrapper/dists
diff --git a/js/react_native/android/gradlew b/js/react_native/android/gradlew
index fbd7c515832da..a69d9cb6c2065 100755
--- a/js/react_native/android/gradlew
+++ b/js/react_native/android/gradlew
@@ -1,7 +1,7 @@
-#!/usr/bin/env sh
+#!/bin/sh
 
 #
-# Copyright 2015 the original author or authors.
+# Copyright © 2015-2021 the original authors.
 #
 # Licensed under the Apache License, Version 2.0 (the "License");
 # you may not use this file except in compliance with the License.
@@ -17,67 +17,101 @@
 #
 
 ##############################################################################
-##
-##  Gradle start up script for UN*X
-##
+#
+#   Gradle start up script for POSIX generated by Gradle.
+#
+#   Important for running:
+#
+#   (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is
+#       noncompliant, but you have some other compliant shell such as ksh or
+#       bash, then to run this script, type that shell name before the whole
+#       command line, like:
+#
+#           ksh Gradle
+#
+#       Busybox and similar reduced shells will NOT work, because this script
+#       requires all of these POSIX shell features:
+#         * functions;
+#         * expansions «$var», «${var}», «${var:-default}», «${var+SET}»,
+#           «${var#prefix}», «${var%suffix}», and «$( cmd )»;
+#         * compound commands having a testable exit status, especially «case»;
+#         * various built-in commands including «command», «set», and «ulimit».
+#
+#   Important for patching:
+#
+#   (2) This script targets any POSIX shell, so it avoids extensions provided
+#       by Bash, Ksh, etc; in particular arrays are avoided.
+#
+#       The "traditional" practice of packing multiple parameters into a
+#       space-separated string is a well documented source of bugs and security
+#       problems, so this is (mostly) avoided, by progressively accumulating
+#       options in "$@", and eventually passing that to Java.
+#
+#       Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS,
+#       and GRADLE_OPTS) rely on word-splitting, this is performed explicitly;
+#       see the in-line comments for details.
+#
+#       There are tweaks for specific operating systems such as AIX, CygWin,
+#       Darwin, MinGW, and NonStop.
+#
+#   (3) This script is generated from the Groovy template
+#       https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt
+#       within the Gradle project.
+#
+#       You can find Gradle at https://github.com/gradle/gradle/.
+#
 ##############################################################################
 
 # Attempt to set APP_HOME
+
 # Resolve links: $0 may be a link
-PRG="$0"
-# Need this for relative symlinks.
-while [ -h "$PRG" ] ; do
-    ls=`ls -ld "$PRG"`
-    link=`expr "$ls" : '.*-> \(.*\)$'`
-    if expr "$link" : '/.*' > /dev/null; then
-        PRG="$link"
-    else
-        PRG=`dirname "$PRG"`"/$link"
-    fi
+app_path=$0
+
+# Need this for daisy-chained symlinks.
+while
+    APP_HOME=${app_path%"${app_path##*/}"}  # leaves a trailing /; empty if no leading path
+    [ -h "$app_path" ]
+do
+    ls=$( ls -ld "$app_path" )
+    link=${ls#*' -> '}
+    case $link in             #(
+      /*)   app_path=$link ;; #(
+      *)    app_path=$APP_HOME$link ;;
+    esac
 done
-SAVED="`pwd`"
-cd "`dirname \"$PRG\"`/" >/dev/null
-APP_HOME="`pwd -P`"
-cd "$SAVED" >/dev/null
+
+APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit
 
 APP_NAME="Gradle"
-APP_BASE_NAME=`basename "$0"`
+APP_BASE_NAME=${0##*/}
 
 # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script.
 DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"'
 
 # Use the maximum available, or set MAX_FD != -1 to use that value.
-MAX_FD="maximum"
+MAX_FD=maximum
 
 warn () {
     echo "$*"
-}
+} >&2
 
 die () {
     echo
     echo "$*"
     echo
     exit 1
-}
+} >&2
 
 # OS specific support (must be 'true' or 'false').
 cygwin=false
 msys=false
 darwin=false
 nonstop=false
-case "`uname`" in
-  CYGWIN* )
-    cygwin=true
-    ;;
-  Darwin* )
-    darwin=true
-    ;;
-  MINGW* )
-    msys=true
-    ;;
-  NONSTOP* )
-    nonstop=true
-    ;;
+case "$( uname )" in                #(
+  CYGWIN* )         cygwin=true  ;; #(
+  Darwin* )         darwin=true  ;; #(
+  MSYS* | MINGW* )  msys=true    ;; #(
+  NONSTOP* )        nonstop=true ;;
 esac
 
 CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
@@ -87,9 +121,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar
 if [ -n "$JAVA_HOME" ] ; then
     if [ -x "$JAVA_HOME/jre/sh/java" ] ; then
         # IBM's JDK on AIX uses strange locations for the executables
-        JAVACMD="$JAVA_HOME/jre/sh/java"
+        JAVACMD=$JAVA_HOME/jre/sh/java
     else
-        JAVACMD="$JAVA_HOME/bin/java"
+        JAVACMD=$JAVA_HOME/bin/java
     fi
     if [ ! -x "$JAVACMD" ] ; then
         die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME
@@ -98,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the
 location of your Java installation."
     fi
 else
-    JAVACMD="java"
+    JAVACMD=java
     which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
 
 Please set the JAVA_HOME variable in your environment to match the
@@ -106,80 +140,101 @@ location of your Java installation."
 fi
 
 # Increase the maximum file descriptors if we can.
-if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then
-    MAX_FD_LIMIT=`ulimit -H -n`
-    if [ $? -eq 0 ] ; then
-        if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then
-            MAX_FD="$MAX_FD_LIMIT"
-        fi
-        ulimit -n $MAX_FD
-        if [ $? -ne 0 ] ; then
-            warn "Could not set maximum file descriptor limit: $MAX_FD"
-        fi
-    else
-        warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT"
-    fi
+if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then
+    case $MAX_FD in #(
+      max*)
+        MAX_FD=$( ulimit -H -n ) ||
+            warn "Could not query maximum file descriptor limit"
+    esac
+    case $MAX_FD in  #(
+      '' | soft) :;; #(
+      *)
+        ulimit -n "$MAX_FD" ||
+            warn "Could not set maximum file descriptor limit to $MAX_FD"
+    esac
 fi
 
-# For Darwin, add options to specify how the application appears in the dock
-if $darwin; then
-    GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\""
-fi
+# Collect all arguments for the java command, stacking in reverse order:
+#   * args from the command line
+#   * the main class name
+#   * -classpath
+#   * -D...appname settings
+#   * --module-path (only if needed)
+#   * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables.
 
 # For Cygwin or MSYS, switch paths to Windows format before running java
-if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then
-    APP_HOME=`cygpath --path --mixed "$APP_HOME"`
-    CLASSPATH=`cygpath --path --mixed "$CLASSPATH"`
-    
-    JAVACMD=`cygpath --unix "$JAVACMD"`
-
-    # We build the pattern for arguments to be converted via cygpath
-    ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null`
-    SEP=""
-    for dir in $ROOTDIRSRAW ; do
-        ROOTDIRS="$ROOTDIRS$SEP$dir"
-        SEP="|"
-    done
-    OURCYGPATTERN="(^($ROOTDIRS))"
-    # Add a user-defined pattern to the cygpath arguments
-    if [ "$GRADLE_CYGPATTERN" != "" ] ; then
-        OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)"
-    fi
+if "$cygwin" || "$msys" ; then
+    APP_HOME=$( cygpath --path --mixed "$APP_HOME" )
+    CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" )
+
+    JAVACMD=$( cygpath --unix "$JAVACMD" )
+
     # Now convert the arguments - kludge to limit ourselves to /bin/sh
-    i=0
-    for arg in "$@" ; do
-        CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -`
-        CHECK2=`echo "$arg"|egrep -c "^-"`                                 ### Determine if an option
-
-        if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then                    ### Added a condition
-            eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"`
-        else
-            eval `echo args$i`="\"$arg\""
+    for arg do
+        if
+            case $arg in                                #(
+              -*)   false ;;                            # don't mess with options #(
+              /?*)  t=${arg#/} t=/${t%%/*}              # looks like a POSIX filepath
+                    [ -e "$t" ] ;;                      #(
+              *)    false ;;
+            esac
+        then
+            arg=$( cygpath --path --ignore --mixed "$arg" )
         fi
-        i=`expr $i + 1`
+        # Roll the args list around exactly as many times as the number of
+        # args, so each arg winds up back in the position where it started, but
+        # possibly modified.
+        #
+        # NB: a `for` loop captures its iteration list before it begins, so
+        # changing the positional parameters here affects neither the number of
+        # iterations, nor the values presented in `arg`.
+        shift                   # remove old arg
+        set -- "$@" "$arg"      # push replacement arg
     done
-    case $i in
-        0) set -- ;;
-        1) set -- "$args0" ;;
-        2) set -- "$args0" "$args1" ;;
-        3) set -- "$args0" "$args1" "$args2" ;;
-        4) set -- "$args0" "$args1" "$args2" "$args3" ;;
-        5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;;
-        6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;;
-        7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;;
-        8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;;
-        9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;;
-    esac
 fi
 
-# Escape application args
-save () {
-    for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done
-    echo " "
-}
-APP_ARGS=`save "$@"`
+# Collect all arguments for the java command;
+#   * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of
+#     shell script including quotes and variable substitutions, so put them in
+#     double quotes to make sure that they get re-expanded; and
+#   * put everything else in single quotes, so that it's not re-expanded.
+
+set -- \
+        "-Dorg.gradle.appname=$APP_BASE_NAME" \
+        -classpath "$CLASSPATH" \
+        org.gradle.wrapper.GradleWrapperMain \
+        "$@"
+
+# Stop when "xargs" is not available.
+if ! command -v xargs >/dev/null 2>&1
+then
+    die "xargs is not available"
+fi
+
+# Use "xargs" to parse quoted args.
+#
+# With -n1 it outputs one arg per line, with the quotes and backslashes removed.
+#
+# In Bash we could simply go:
+#
+#   readarray ARGS < <( xargs -n1 <<<"$var" ) &&
+#   set -- "${ARGS[@]}" "$@"
+#
+# but POSIX shell has neither arrays nor command substitution, so instead we
+# post-process each arg (as a line of input to sed) to backslash-escape any
+# character that might be a shell metacharacter, then use eval to reverse
+# that process (while maintaining the separation between arguments), and wrap
+# the whole thing up as a single "set" statement.
+#
+# This will of course break if any of these variables contains a newline or
+# an unmatched quote.
+#
 
-# Collect all arguments for the java command, following the shell quoting and substitution rules
-eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS"
+eval "set -- $(
+        printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" |
+        xargs -n1 |
+        sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' |
+        tr '\n' ' '
+    )" '"$@"'
 
 exec "$JAVACMD" "$@"
diff --git a/js/react_native/android/gradlew.bat b/js/react_native/android/gradlew.bat
index 5093609d512a9..f127cfd49d402 100644
--- a/js/react_native/android/gradlew.bat
+++ b/js/react_native/android/gradlew.bat
@@ -14,7 +14,7 @@
 @rem limitations under the License.
 @rem
 
-@if "%DEBUG%" == "" @echo off
+@if "%DEBUG%"=="" @echo off
 @rem ##########################################################################
 @rem
 @rem  Gradle startup script for Windows
@@ -25,7 +25,7 @@
 if "%OS%"=="Windows_NT" setlocal
 
 set DIRNAME=%~dp0
-if "%DIRNAME%" == "" set DIRNAME=.
+if "%DIRNAME%"=="" set DIRNAME=.
 set APP_BASE_NAME=%~n0
 set APP_HOME=%DIRNAME%
 
@@ -40,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome
 
 set JAVA_EXE=java.exe
 %JAVA_EXE% -version >NUL 2>&1
-if "%ERRORLEVEL%" == "0" goto init
+if %ERRORLEVEL% equ 0 goto execute
 
 echo.
 echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH.
@@ -54,7 +54,7 @@ goto fail
 set JAVA_HOME=%JAVA_HOME:"=%
 set JAVA_EXE=%JAVA_HOME%/bin/java.exe
 
-if exist "%JAVA_EXE%" goto init
+if exist "%JAVA_EXE%" goto execute
 
 echo.
 echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME%
@@ -64,21 +64,6 @@ echo location of your Java installation.
 
 goto fail
 
-:init
-@rem Get command-line arguments, handling Windows variants
-
-if not "%OS%" == "Windows_NT" goto win9xME_args
-
-:win9xME_args
-@rem Slurp the command line arguments.
-set CMD_LINE_ARGS=
-set _SKIP=2
-
-:win9xME_args_slurp
-if "x%~1" == "x" goto execute
-
-set CMD_LINE_ARGS=%*
-
 :execute
 @rem Setup the command line
 
@@ -86,17 +71,19 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar
 
 
 @rem Execute Gradle
-"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS%
+"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %*
 
 :end
 @rem End local scope for the variables with windows NT shell
-if "%ERRORLEVEL%"=="0" goto mainEnd
+if %ERRORLEVEL% equ 0 goto mainEnd
 
 :fail
 rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of
 rem the _cmd.exe /c_ return code!
-if  not "" == "%GRADLE_EXIT_CONSOLE%" exit 1
-exit /b 1
+set EXIT_CODE=%ERRORLEVEL%
+if %EXIT_CODE% equ 0 set EXIT_CODE=1
+if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE%
+exit /b %EXIT_CODE%
 
 :mainEnd
 if "%OS%"=="Windows_NT" endlocal
diff --git a/js/react_native/e2e/android/app/build.gradle b/js/react_native/e2e/android/app/build.gradle
index 8a84b0d5065a8..526259e3f8d8f 100644
--- a/js/react_native/e2e/android/app/build.gradle
+++ b/js/react_native/e2e/android/app/build.gradle
@@ -193,7 +193,7 @@ dependencies {
     implementation "com.facebook.react:react-native:+"  // From node_modules
 
     implementation "androidx.swiperefreshlayout:swiperefreshlayout:1.0.0"
-    implementation 'androidx.test.ext:junit:1.1.3'
+    implementation 'androidx.test.ext:junit:1.1.5'
     debugImplementation("com.facebook.flipper:flipper:${FLIPPER_VERSION}") {
       exclude group:'com.facebook.fbjni'
     }
@@ -213,9 +213,9 @@ dependencies {
         implementation jscFlavor
     }
 
-    androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0'
-    androidTestImplementation 'androidx.test:runner:1.4.0'
-    androidTestImplementation 'androidx.test:rules:1.4.0'
+    androidTestImplementation "androidx.test.espresso:espresso-core:3.5.0"
+    androidTestImplementation "androidx.test:runner:1.5.2"
+    androidTestImplementation "androidx.test:rules:1.5.0"
 
     implementation project(':onnxruntime-react-native')
     // specify ORT dependency here so it can be found in libs flatDir repository
diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md
index f63cf17aa4df3..5c8748d75c2bc 100644
--- a/js/web/docs/webgpu-operators.md
+++ b/js/web/docs/webgpu-operators.md
@@ -50,6 +50,7 @@ Do not modify directly.*
 | Gather | ai.onnx(1-10,11-12,13+) |  |
 | GatherBlockQuantized | com.microsoft(1+) |  |
 | GatherElements | ai.onnx(11-12,13+) |  |
+| GatherND | ai.onnx(11,12,13+) |  |
 | Gelu | ai.onnx(20+); com.microsoft(1+) |  |
 | Gemm | ai.onnx(7-8,9-10,11-12,13+) |  |
 | GlobalAveragePool | ai.onnx(1+); com.ms.internal.nhwc(1+) |  |
diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
index 28af5d461abe0..6c7afbc7365bb 100644
--- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
+++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
@@ -16,6 +16,7 @@ import { einsum, parseEinsumAttributes } from './ops/einsum';
 import { expand } from './ops/expand';
 import { fastGelu } from './ops/fast-gelu';
 import { gather, parseGatherAttributes } from './ops/gather';
+import { gatherND, parseGatherNDAttributes } from './ops/gather-nd';
 import { gatherBlockQuantized, parseGatherBlockQuantizedAttributes } from './ops/gather-block-quantized';
 import { gatherElements, parseGatherElementsAttributes } from './ops/gather-elements';
 import { gemm, parseGemmAttributes } from './ops/gemm';
@@ -100,6 +101,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
   ['Gather', [gather, parseGatherAttributes]],
   ['GatherElements', [gatherElements, parseGatherElementsAttributes]],
   ['GatherBlockQuantized', [gatherBlockQuantized, parseGatherBlockQuantizedAttributes]],
+  ['GatherND', [gatherND, parseGatherNDAttributes]],
   ['Gelu', [unaryOps.gelu]],
   ['Gemm', [gemm, parseGemmAttributes]],
   ['GlobalAveragePool', [pool.globalAveragePool, pool.parseGlobalAveragePoolAttributes]],
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts b/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts
new file mode 100644
index 0000000000000..43b51f6e94a66
--- /dev/null
+++ b/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts
@@ -0,0 +1,179 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+import { DataType } from '../../../wasm-common';
+import { TensorView } from '../../tensor-view';
+import { ShapeUtil } from '../../util';
+import { AttributeWithCacheKey } from '../attribute-with-cache-key';
+import { ComputeContext, ProgramUniform } from '../types';
+
+import { createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper, UniformsArrayType } from './common';
+
+export interface GatherNDAttributes extends AttributeWithCacheKey {
+  readonly batchDims: number;
+}
+
+const computeSliceOffsets = (
+  context: ComputeContext,
+  indicesData: TensorView,
+  sizesFromSliceDimsData: number[],
+  batchDims: number,
+  inputDims: readonly number[],
+  numSlices: number,
+  numSlicesPerBatch: number,
+  inputBatchStride: number,
+  numSliceDims: number,
+) => {
+  const programUniforms: ProgramUniform[] = [
+    { type: DataType.uint32, data: numSlices },
+    { type: DataType.uint32, data: batchDims },
+    { type: DataType.uint32, data: inputDims },
+    { type: DataType.uint32, data: sizesFromSliceDimsData },
+    { type: DataType.uint32, data: numSlicesPerBatch },
+    { type: DataType.uint32, data: inputBatchStride },
+    { type: DataType.uint32, data: numSliceDims },
+  ];
+
+  const outputShape = [numSlices];
+  programUniforms.push(...createTensorShapeVariables(indicesData.dims, outputShape));
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => {
+    const indices = inputVariable('indices_data', indicesData.dataType, indicesData.dims.length);
+    const output = outputVariable('input_slice_offsets_data', DataType.uint32, 1, 1);
+    const variables = [indices, output];
+    const uniforms: UniformsArrayType = [
+      { name: 'output_size', type: 'u32' },
+      { name: 'batch_dims', type: 'u32' },
+      { name: 'input_dims', type: 'u32', length: inputDims.length },
+      { name: 'sizes_from_slice_dims_data', type: 'u32', length: sizesFromSliceDimsData.length },
+      { name: 'num_slices_per_batch', type: 'u32' },
+      { name: 'input_batch_stride', type: 'u32' },
+      { name: 'num_slice_dims', type: 'u32' },
+    ];
+    return `
+  ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)}
+  ${shaderHelper.mainStart()}
+    ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
+    let batch_idx = global_idx / uniforms.num_slices_per_batch;
+    let base_offset = batch_idx * uniforms.input_batch_stride;
+
+    let slice_indices_base_offset = global_idx * uniforms.num_slice_dims;
+    var relative_slice_offset = 0;
+    for (var dim_idx = 0u; dim_idx < uniforms.num_slice_dims; dim_idx ++) {
+      var index = i32(indices_data[dim_idx + slice_indices_base_offset].x);
+      let input_dim_idx = uniforms.batch_dims + dim_idx;
+      if (index < 0) {
+        ${
+          inputDims.length === 1
+            ? 'index += i32(uniforms.input_dims);'
+            : 'index += i32(uniforms.input_dims[input_dim_idx]);'
+        }
+      }
+      ${
+        sizesFromSliceDimsData.length === 1
+          ? 'relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data);'
+          : 'relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data[dim_idx]);'
+      }
+    }
+
+    input_slice_offsets_data[global_idx] =  base_offset + u32(relative_slice_offset);
+  }`;
+  };
+
+  return context.compute(
+    {
+      name: 'computeSliceOffsets',
+      shaderCache: { hint: `${inputDims.length}_${sizesFromSliceDimsData.length}`, inputDependencies: ['rank'] },
+      getRunData: () => ({
+        outputs: [{ dims: outputShape, dataType: context.inputs[1].dataType }],
+        dispatchGroup: { x: Math.ceil(numSlices / 64) },
+        programUniforms,
+      }),
+      getShaderSource,
+    },
+    { inputs: [indicesData], outputs: [-1] },
+  )[0];
+};
+
+export const gatherND = (context: ComputeContext, attributes: GatherNDAttributes) => {
+  const inputs = context.inputs;
+  const inputShape = inputs[0].dims;
+  const inputType = inputs[0].dataType;
+  const indicesShape = inputs[1].dims;
+  const numSliceDims = indicesShape[indicesShape.length - 1];
+  const numSlices = ShapeUtil.sizeToDimension(indicesShape, indicesShape.length - 1);
+  const sliceSize = ShapeUtil.sizeFromDimension(inputShape, attributes.batchDims + numSliceDims);
+  const numBatches = ShapeUtil.sizeToDimension(inputShape, attributes.batchDims);
+  const inputBatchStride = ShapeUtil.sizeFromDimension(inputShape, attributes.batchDims);
+  const numSlicesPerBatch = numSlices / numBatches;
+  const sizesFromSliceDims = new Array(numSliceDims);
+  let runningProduct = sliceSize;
+  for (let i = 0; i < numSliceDims; ++i) {
+    sizesFromSliceDims[numSliceDims - 1 - i] = runningProduct;
+    runningProduct *= inputShape[attributes.batchDims + numSliceDims - 1 - i];
+  }
+
+  const inputSliceOffsets = computeSliceOffsets(
+    context,
+    inputs[1],
+    sizesFromSliceDims,
+    attributes.batchDims,
+    inputShape,
+    numSlices,
+    numSlicesPerBatch,
+    inputBatchStride,
+    numSliceDims,
+  );
+
+  const lastIndicesDimension = attributes.batchDims + numSliceDims;
+  if (lastIndicesDimension > inputShape.length) {
+    throw new Error('last dimension of indices must not be larger than rank of input tensor');
+  }
+
+  const outputShape = indicesShape.slice(0, -1).concat(inputShape.slice(lastIndicesDimension));
+  const outputSize = ShapeUtil.size(outputShape);
+
+  const programUniforms: ProgramUniform[] = [
+    { type: DataType.uint32, data: outputSize },
+    { type: DataType.uint32, data: sliceSize },
+    ...createTensorShapeVariables(inputs[0].dims, inputSliceOffsets.dims, outputShape),
+  ];
+
+  const getShaderSource = (shaderHelper: ShaderHelper) => {
+    const input = inputVariable('data', inputs[0].dataType, inputs[0].dims.length);
+    const indices = inputVariable('slice_offsets', DataType.uint32, inputSliceOffsets.dims.length);
+
+    const output = outputVariable('output', inputs[0].dataType, outputShape.length);
+    return `
+          ${shaderHelper
+            .registerUniform('output_size', 'u32')
+            .registerUniform('slice_size', 'u32')
+            .declareVariables(input, indices, output)}
+            ${shaderHelper.mainStart()}
+            ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')}
+          let slice_offset = slice_offsets[global_idx / uniforms.slice_size];
+          output[global_idx] = data[u32(slice_offset) + global_idx % uniforms.slice_size];
+        }`;
+  };
+  context.compute(
+    {
+      name: 'GatherND',
+      shaderCache: { hint: attributes.cacheKey, inputDependencies: ['rank', 'rank'] },
+      getRunData: () => ({
+        outputs: [{ dims: outputShape, dataType: inputType }],
+        dispatchGroup: { x: Math.ceil(outputSize / 64 /* workgroup size */) },
+        programUniforms,
+      }),
+      getShaderSource,
+    },
+    { inputs: [inputs[0], inputSliceOffsets] },
+  );
+};
+
+export const parseGatherNDAttributes = (attributes: Record<string, unknown>): GatherNDAttributes => {
+  const batchDims = attributes.batch_dims as number;
+  return {
+    batchDims,
+    cacheKey: '',
+  };
+};
diff --git a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
index 21225a77b189b..5059645211aea 100644
--- a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
+++ b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts
@@ -29,7 +29,9 @@ const permFunctionBody = (perm: number[], rank: number, input: IndicesHelper, ou
   let reverseFunc = `fn perm(i: ${output.type.indices}) -> ${input.type.indices} {
     var a: ${input.type.indices};`;
   for (let i = 0; i < rank; ++i) {
-    reverseFunc += input.indicesSet('a', perm[i], `i[${i}]`);
+    // input indices and output indices should always be larger or equal to 2,
+    // so indexer is always valid to be used on `a` and `i`.
+    reverseFunc += `a[${perm[i]}]=i[${i}];`;
   }
   return (reverseFunc += 'return a;}');
 };
@@ -71,7 +73,7 @@ export const createTransposeProgramInfo = (inputTensor: TensorView, permAttr: nu
   const outputShape = getOutputShape(inputTensor.dims, perm);
   let newInputShape = inputTensor.dims;
   let newOutputShape = outputShape;
-  const transposeAsReshape = isTransposeReshape(perm, inputTensor.dims);
+  const transposeAsReshape = inputRank < 2 || isTransposeReshape(perm, inputTensor.dims);
   let getShaderSource;
   if (transposeAsReshape) {
     getShaderSource = (shaderHelper: ShaderHelper) => {
diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts
index 81d1b73efc9d4..da8939cd0263a 100644
--- a/js/web/lib/wasm/wasm-core-impl.ts
+++ b/js/web/lib/wasm/wasm-core-impl.ts
@@ -487,7 +487,7 @@ export const prepareInputOutputTensor = (
   }
 
   if (location === 'gpu-buffer') {
-    const gpuBuffer = tensor[2].gpuBuffer as GPUBuffer;
+    const gpuBuffer = tensor[2].gpuBuffer;
     dataByteLength = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(dataType), dims)!;
 
     const registerBuffer = wasm.jsepRegisterBuffer;
diff --git a/js/web/package.json b/js/web/package.json
index 656cd7b56b039..181d6127f5455 100644
--- a/js/web/package.json
+++ b/js/web/package.json
@@ -83,7 +83,7 @@
       "types": "./types.d.ts"
     },
     "./wasm": {
-      "import": "./dist/ort.wasm.min.mjs",
+      "import": "./dist/ort.wasm.bundle.min.mjs",
       "require": "./dist/ort.wasm.min.js",
       "types": "./types.d.ts"
     },
diff --git a/js/web/script/build.ts b/js/web/script/build.ts
index 408f9e00a5cbd..529e9d1065e69 100644
--- a/js/web/script/build.ts
+++ b/js/web/script/build.ts
@@ -591,14 +591,14 @@ async function main() {
     // ort[.min].[m]js
     await addAllWebBuildTasks({
       outputName: 'ort',
-      define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true' },
+      define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true' },
     });
     // ort.bundle.min.mjs
     await buildOrt({
       isProduction: true,
       outputName: 'ort.bundle',
       format: 'esm',
-      define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_DYNAMIC_IMPORT': 'true' },
+      define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true', 'BUILD_DEFS.DISABLE_DYNAMIC_IMPORT': 'true' },
     });
 
     // ort.webgpu[.min].[m]js
@@ -619,6 +619,13 @@ async function main() {
       outputName: 'ort.wasm',
       define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_WEBGL': 'true' },
     });
+    // ort.wasm.bundle.min.mjs
+    await buildOrt({
+      isProduction: true,
+      outputName: 'ort.wasm.bundle',
+      format: 'esm',
+      define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_WEBGL': 'true' },
+    });
     // ort.webgl[.min].[m]js
     await addAllWebBuildTasks({
       outputName: 'ort.webgl',
diff --git a/js/web/test/data/ops/gather-nd.jsonc b/js/web/test/data/ops/gather-nd.jsonc
new file mode 100644
index 0000000000000..209c7d1f74087
--- /dev/null
+++ b/js/web/test/data/ops/gather-nd.jsonc
@@ -0,0 +1,147 @@
+[
+  {
+    "name": "GatherND int32",
+    "operator": "GatherND",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "data[4] indices[]",
+        "inputs": [
+          {
+            "data": [100, 101, 102, 777, 778, 779, 1000, 1001, 1002],
+            "dims": [9],
+            "type": "int32"
+          },
+          {
+            "data": [0, 4, 8],
+            "dims": [3, 1],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [100, 778, 1002],
+            "dims": [3],
+            "type": "int32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "GatherND float32",
+    "operator": "GatherND",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "data[4] indices[]",
+        "inputs": [
+          {
+            "data": [100.1, 101.2, 102.3, 777.4, 778.5, 779.6, 1000.7, 1001.8, 1002.9],
+            "dims": [9],
+            "type": "float32"
+          },
+          {
+            "data": [0, 4, 8],
+            "dims": [3, 1],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [100.0999984741211, 778.5, 1002.9000244140625],
+            "dims": [3],
+            "type": "float32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "GatherND int32 [2 2 2], batch_dims",
+    "operator": "GatherND",
+    "attributes": [{ "name": "batch_dims", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "data[4] indices[]",
+        "inputs": [
+          {
+            "data": [0, 1, 2, 3, 4, 5, 6, 7],
+            "dims": [2, 2, 2],
+            "type": "int32"
+          },
+          {
+            "data": [1, 0],
+            "dims": [2, 1],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [2, 3, 4, 5],
+            "dims": [2, 2],
+            "type": "int32"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "GatherND float16",
+    "operator": "GatherND",
+    "attributes": [],
+    "cases": [
+      {
+        "name": "data[4] indices[]",
+        "inputs": [
+          {
+            "data": [100.1, 101.2, 102.3, 777.4, 778.5, 779.6, 1000.7, 1001.8, 1002.9],
+            "dims": [9],
+            "type": "float16"
+          },
+          {
+            "data": [0, 4, 8],
+            "dims": [3, 1],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [100.0999984741211, 778.5, 1002.9000244140625],
+            "dims": [3],
+            "type": "float16"
+          }
+        ]
+      }
+    ]
+  },
+  {
+    "name": "GatherND uint32 [2 2 2], batch_dims",
+    "operator": "GatherND",
+    "attributes": [{ "name": "batch_dims", "data": 1, "type": "int" }],
+    "cases": [
+      {
+        "name": "data[4] indices[]",
+        "inputs": [
+          {
+            "data": [0, 1, 2, 3, 4, 5, 6, 7],
+            "dims": [2, 2, 2],
+            "type": "uint32"
+          },
+          {
+            "data": [1, 0],
+            "dims": [2, 1],
+            "type": "int64"
+          }
+        ],
+        "outputs": [
+          {
+            "data": [2, 3, 4, 5],
+            "dims": [2, 2],
+            "type": "uint32"
+          }
+        ]
+      }
+    ]
+  }
+]
diff --git a/js/web/test/e2e/browser-test-wasm-binary-override.js b/js/web/test/e2e/browser-test-wasm-binary-override.js
index 471c26f6990b5..27cce2ca06236 100644
--- a/js/web/test/e2e/browser-test-wasm-binary-override.js
+++ b/js/web/test/e2e/browser-test-wasm-binary-override.js
@@ -7,7 +7,7 @@ const documentUrl = document.currentScript.src;
 
 it('Browser E2E testing - WebAssembly backend', async function () {
   // preload .wasm file binary
-  const wasmUrl = new URL('./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.wasm', documentUrl).href;
+  const wasmUrl = new URL('./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm', documentUrl).href;
   const response = await fetch(wasmUrl);
 
   // make sure the .wasm file is loaded successfully
diff --git a/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js b/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js
new file mode 100644
index 0000000000000..d325a5ca7187d
--- /dev/null
+++ b/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js
@@ -0,0 +1,28 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+'use strict';
+
+it('Browser E2E testing - WebAssembly backend (path override filename)', async function () {
+  // check base URL port from test args
+  if (typeof __ort_arg_port === 'undefined') {
+    throw new Error('test flag --port=<PORT> is required');
+  }
+  const base = `http://localhost:${__ort_arg_port}/`;
+
+  ort.env.wasm.wasmPaths = {};
+
+  if (typeof __ort_arg_files === 'string' && __ort_arg_files.includes('wasm')) {
+    const overrideWasmUrl = new URL('./test-wasm-path-override/jsep-renamed.wasm', base).href;
+    console.log(`ort.env.wasm.wasmPaths['wasm'] = ${JSON.stringify(overrideWasmUrl)};`);
+    ort.env.wasm.wasmPaths.wasm = overrideWasmUrl;
+  }
+
+  if (typeof __ort_arg_files === 'string' && __ort_arg_files.includes('mjs')) {
+    const overrideMjsUrl = new URL('./test-wasm-path-override/jsep-renamed.mjs', base).href;
+    console.log(`ort.env.wasm.wasmPaths['mjs'] = ${JSON.stringify(overrideMjsUrl)};`);
+    ort.env.wasm.wasmPaths.mjs = overrideMjsUrl;
+  }
+
+  await testFunction(ort, { executionProviders: ['wasm'] });
+});
diff --git a/js/web/test/e2e/run-data.js b/js/web/test/e2e/run-data.js
index 04079b042bc23..dbc3ca0bd2460 100644
--- a/js/web/test/e2e/run-data.js
+++ b/js/web/test/e2e/run-data.js
@@ -14,7 +14,7 @@ const NODEJS_TEST_CASES = [
 // [test_for_same_origin, test_for_cross_origin, main_js, ort_main_js, [test_args]]
 const BROWSER_TEST_CASES = [
   // IIFE
-  [true, true, './browser-test-webgl.js', 'ort.min.js'], // webgl
+  [true, true, './browser-test-webgl.js', 'ort.all.min.js'], // webgl
   [true, true, './browser-test-webgl.js', 'ort.webgl.min.js'], // webgl
   [true, true, './browser-test-wasm.js', 'ort.wasm.min.js'], // wasm, ort.wasm
   [true, true, './browser-test-wasm-multi-session-create.js', 'ort.min.js'], // wasm, multi-session create
@@ -24,7 +24,7 @@ const BROWSER_TEST_CASES = [
   [true, true, './browser-test-wasm.js', 'ort.min.js', ['num_threads=1', 'proxy=1']], // wasm, 1 thread, proxy
 
   // ort.min.mjs
-  [true, true, './browser-test-webgl.js', 'ort.min.mjs'], // webgl
+  [true, true, './browser-test-webgl.js', 'ort.webgl.min.mjs'], // webgl
   [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=1']], // wasm, 1 thread
   [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=2']], // wasm, 2 threads
   [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=2', 'proxy=1']], // wasm, 2 threads, proxy
@@ -41,22 +41,22 @@ const BROWSER_TEST_CASES = [
 
   // path override:
   // wasm, path override filenames for both mjs and wasm, same origin
-  [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=mjs,wasm']],
+  [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=mjs,wasm']],
   [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=mjs,wasm']],
   // wasm, path override filenames for both mjs and wasm, cross origin
-  [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=mjs,wasm']],
+  [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=mjs,wasm']],
   [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=mjs,wasm']],
   // wasm, path override filename for wasm, same origin
-  [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=wasm']],
+  [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=wasm']],
   [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=wasm']],
   // wasm, path override filename for wasm, cross origin
-  [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=wasm']],
+  [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=wasm']],
   [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=wasm']],
   // wasm, path override filename for mjs, same origin
-  [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=mjs']],
+  [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=mjs']],
   [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=mjs']],
   // wasm, path override filename for mjs, cross origin
-  [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=mjs']],
+  [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=mjs']],
   [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=mjs']],
   // wasm, path override prefix, same origin
   [true, false, './browser-test-wasm-path-override-prefix.js', 'ort.min.js', ['port=9876']],
diff --git a/js/web/test/e2e/run.js b/js/web/test/e2e/run.js
index 93f9d4a144bf2..3361bbece64ed 100644
--- a/js/web/test/e2e/run.js
+++ b/js/web/test/e2e/run.js
@@ -146,6 +146,10 @@ function prepareWasmPathOverrideFiles() {
   fs.copyFileSync(`${sourceFile}.wasm`, path.join(folder, 'ort-wasm-simd-threaded.wasm'));
   fs.copyFileSync(`${sourceFile}.mjs`, path.join(folder, 'renamed.mjs'));
   fs.copyFileSync(`${sourceFile}.wasm`, path.join(folder, 'renamed.wasm'));
+  fs.copyFileSync(`${sourceFile}.jsep.mjs`, path.join(folder, 'ort-wasm-simd-threaded.jsep.mjs'));
+  fs.copyFileSync(`${sourceFile}.jsep.wasm`, path.join(folder, 'ort-wasm-simd-threaded.jsep.wasm'));
+  fs.copyFileSync(`${sourceFile}.jsep.mjs`, path.join(folder, 'jsep-renamed.mjs'));
+  fs.copyFileSync(`${sourceFile}.jsep.wasm`, path.join(folder, 'jsep-renamed.wasm'));
 }
 
 async function testAllNodejsCases() {
diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc
index 45fb771ee13bb..f179756967d49 100644
--- a/js/web/test/suite-test-list.jsonc
+++ b/js/web/test/suite-test-list.jsonc
@@ -1365,6 +1365,7 @@
       "gather.jsonc",
       "gather-block-quantized.jsonc",
       "gather-elements.jsonc",
+      "gather-nd.jsonc",
       "gemm.jsonc",
       "global-average-pool.jsonc",
       "greater.jsonc",
diff --git a/objectivec/error_utils.mm b/objectivec/error_utils.mm
index 335cf8894d549..e8d4d5bb365c9 100644
--- a/objectivec/error_utils.mm
+++ b/objectivec/error_utils.mm
@@ -11,7 +11,7 @@ void ORTSaveCodeAndDescriptionToError(int code, const char* descriptionCstr, NSE
   if (!error) return;
 
   NSString* description = [NSString stringWithCString:descriptionCstr
-                                             encoding:NSASCIIStringEncoding];
+                                             encoding:NSUTF8StringEncoding];
 
   *error = [NSError errorWithDomain:kOrtErrorDomain
                                code:code
diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
index 29f328264bf3f..31f95ee64df5d 100644
--- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
+++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc
@@ -1,6 +1,8 @@
 // Copyright (c) Microsoft Corporation. All rights reserved.
 // Licensed under the MIT License.
 
+#include <string_view>
+
 #include "contrib_ops/webgpu/quantization/matmul_nbits.h"
 #include "contrib_ops/webgpu/webgpu_contrib_kernels.h"
 #include "core/providers/cpu/math/matmul_helper.h"
@@ -352,8 +354,11 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context
   const uint32_t components_a = GetMaxComponents(K);
   const uint32_t components_b = GetMaxComponents(blob_size_in_words);
   uint32_t components = GetMaxComponents(N);
-  const bool is_intel = !std::strcmp(context.AdapterInfo().vendor, "intel") && !std::strcmp(context.AdapterInfo().architecture, "gen-12lp");
-  const bool use_block32 = is_intel && block_size == 32;
+
+  // Use block32 for Intel Gen12LP architecture.
+  const bool use_block32 = context.AdapterInfo().vendor == std::string_view{"intel"} &&
+                           context.AdapterInfo().architecture == std::string_view{"gen-12lp"} &&
+                           block_size == 32;
   const bool has_zero_points = zero_points != nullptr;
   // TODO: Support output_number > 1. Some cases are failed when output_number > 1.
   // const uint32_t output_number = M > 1 && (N / components) % 2 == 0 ? 2 : 1;
diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc
index c3c99c7d6855a..c1a8b373bed84 100644
--- a/onnxruntime/core/providers/js/js_execution_provider.cc
+++ b/onnxruntime/core/providers/js/js_execution_provider.cc
@@ -341,6 +341,10 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Gat
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, GatherElements);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherElements);
 
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, GatherND);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, 12, GatherND);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherND);
+
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 9, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, Slice);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Slice);
@@ -667,6 +671,10 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, GatherElements)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherElements)>,
 
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, GatherND)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, 12, GatherND)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherND)>,
+
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Resize)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, 17, Resize)>,
diff --git a/onnxruntime/core/providers/js/operators/gather_nd.cc b/onnxruntime/core/providers/js/operators/gather_nd.cc
new file mode 100644
index 0000000000000..ee69100cc658e
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/gather_nd.cc
@@ -0,0 +1,41 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/js/js_kernel.h"
+#include "core/providers/js/js_data_types.h"
+#include "gather_nd.h"
+
+namespace onnxruntime {
+namespace js {
+
+ONNX_OPERATOR_KERNEL_EX(
+    GatherND,
+    kOnnxDomain,
+    13,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedDataTypes()),
+    GatherND);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    GatherND,
+    kOnnxDomain,
+    12,
+    12,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedDataTypes()),
+    GatherND);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    GatherND,
+    kOnnxDomain,
+    11,
+    11,
+    kJsExecutionProvider,
+    (*KernelDefBuilder::Create())
+        .TypeConstraint("T", JsepSupportedDataTypes()),
+    GatherND);
+
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/js/operators/gather_nd.h b/onnxruntime/core/providers/js/operators/gather_nd.h
new file mode 100644
index 0000000000000..cdf7a52630dad
--- /dev/null
+++ b/onnxruntime/core/providers/js/operators/gather_nd.h
@@ -0,0 +1,24 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/providers/js/js_kernel.h"
+
+namespace onnxruntime {
+namespace js {
+
+class GatherND : public JsKernel {
+ public:
+  GatherND(const OpKernelInfo& info) : JsKernel(info) {
+    int64_t batchDims = info.GetAttrOrDefault<int64_t>("batch_dims", 0);
+
+    JSEP_INIT_KERNEL_ATTRIBUTE(GatherND, ({
+                                 "batch_dims" : Number($1),
+                               }),
+                               static_cast<int32_t>(batchDims));
+  }
+};
+
+}  // namespace js
+}  // namespace onnxruntime
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
index d979d53347c4f..1b432dad44263 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc
@@ -1726,8 +1726,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv
   }
 
   trt_version_ = getInferLibVersion();
+  CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_));
 
   LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_;
+  LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] CUDA version is " << cuda_version_;
 
   LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: "
                         << "device_id: " << device_id_
@@ -2466,13 +2468,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph,
   // So, simply return the ComputeCapability here.
   if (graph.NumberOfNodes() == 1 && GraphHasCtxNode(graph)) {
     SubGraph_t supported_node_vector = {{0}, true};
-    std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph), 0);
+    std::unique_ptr<IndexedSubGraph> sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)), 0);
     result.push_back(ComputeCapability::Create(std::move(sub_graph)));
     return result;
   }
 
   // Generate unique kernel name for TRT graph
-  HashValue model_hash = TRTGenerateId(graph);
+  HashValue model_hash = TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_));
 
   // Get supported node list from TensorRT parser
   const int number_of_ort_nodes = graph.NumberOfNodes();
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
index 9e3a03417d917..d3e0b0fba8891 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h
@@ -333,6 +333,7 @@ class TensorrtExecutionProvider : public IExecutionProvider {
 
   // The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH
   int32_t trt_version_;
+  int32_t cuda_version_;
 
   // The OrtAllocator object will be get during ep compute time
   // and should be kept for the lifetime of TRT EP object.
diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
index 95abcd1bad2b8..5a7b135fd92cd 100644
--- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
+++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h
@@ -520,7 +520,7 @@ void RemoveCachesByType(const std::string& root, std::string file_extension) {
  * compiled kernels, so the name must be unique and deterministic across models and sessions.
  * </remarks>
  */
-HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
+HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version, std::string cuda_version) {
   HashValue model_hash = 0;
 
   // find the top level graph
@@ -583,12 +583,11 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) {
 #endif
 
 #ifdef CUDA_VERSION
-  hash_str(std::to_string(CUDA_VERSION));
+  hash_str(cuda_version);
 #endif
 
 #if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR)
-  std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
-  hash_str(TRT_VERSION);
+  hash_str(trt_version);
 #endif
 
   model_hash = hash[0] | (uint64_t(hash[1]) << 32);
diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.cc b/onnxruntime/core/providers/webgpu/tensor/flatten.cc
new file mode 100644
index 0000000000000..81d28bd3c0fa7
--- /dev/null
+++ b/onnxruntime/core/providers/webgpu/tensor/flatten.cc
@@ -0,0 +1,52 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#include "core/providers/webgpu/tensor/flatten.h"
+#include "core/providers/webgpu/webgpu_execution_provider.h"
+#include "core/providers/webgpu/webgpu_supported_types.h"
+
+namespace onnxruntime {
+namespace webgpu {
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Flatten,
+    kOnnxDomain,
+    1, 8,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1),
+    Flatten);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Flatten,
+    kOnnxDomain,
+    9, 10,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1),
+    Flatten);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Flatten,
+    kOnnxDomain,
+    11, 12,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1),
+    Flatten);
+
+ONNX_OPERATOR_VERSIONED_KERNEL_EX(
+    Flatten,
+    kOnnxDomain,
+    13, 20,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1),
+    Flatten);
+
+ONNX_OPERATOR_KERNEL_EX(
+    Flatten,
+    kOnnxDomain,
+    21,
+    kWebGpuExecutionProvider,
+    (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1),
+    Flatten);
+
+}  // namespace webgpu
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.h b/onnxruntime/core/providers/webgpu/tensor/flatten.h
new file mode 100644
index 0000000000000..5fc49a844b404
--- /dev/null
+++ b/onnxruntime/core/providers/webgpu/tensor/flatten.h
@@ -0,0 +1,62 @@
+// Copyright (c) Microsoft Corporation. All rights reserved.
+// Licensed under the MIT License.
+
+#pragma once
+
+#include "core/framework/op_kernel.h"
+#include "core/providers/cpu/nn/flatten.h"
+#include "core/framework/data_transfer_manager.h"
+
+namespace onnxruntime {
+namespace webgpu {
+
+class Flatten final : public OpKernel {
+ public:
+  explicit Flatten(const OpKernelInfo& info) : OpKernel{info} {
+    axis_ = info.GetAttrOrDefault<int64_t>("axis", 1);
+  }
+
+  Status Compute(OpKernelContext* context) const override {
+    const Tensor* input_tensor = context->Input<Tensor>(0);
+    const TensorShape& input_shape = input_tensor->Shape();
+    int64_t input_rank = input_shape.NumDimensions();
+
+    // Handle negative axis
+    int64_t axis = axis_;
+    if (axis < 0) {
+      axis += input_rank;
+    }
+
+    if (axis > input_rank) {
+      return Status(common::ONNXRUNTIME, common::FAIL, "Invalid value for axis, must be less than or equal to input_rank");
+    }
+
+    int64_t first_dim = 1;
+    for (int64_t i = 0; i < axis; i++) {
+      first_dim *= input_shape[i];
+    }
+
+    int64_t second_dim = 1;
+    for (int64_t i = axis; i < input_rank; i++) {
+      second_dim *= input_shape[i];
+    }
+
+    TensorShape output_shape({first_dim, second_dim});
+    Tensor* output_tensor = context->Output(0, output_shape);
+
+    const void* source = input_tensor->DataRaw();
+    void* target = output_tensor->MutableDataRaw();
+    // If source and target pointers are not equal (non-inplace operation), we need to copy the data.
+    if (target != source) {
+      ORT_RETURN_IF_ERROR(Info().GetDataTransferManager().CopyTensor(*input_tensor, *output_tensor));
+    }
+
+    return Status::OK();
+  }
+
+ private:
+  int64_t axis_;
+};
+
+}  // namespace webgpu
+}  // namespace onnxruntime
\ No newline at end of file
diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc
index 36aab2e628a16..ea0cbddb0205d 100644
--- a/onnxruntime/core/providers/webgpu/webgpu_context.cc
+++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc
@@ -58,16 +58,15 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info
       adapter_toggles_desc.enabledToggleCount = enabled_adapter_toggles.size();
       adapter_toggles_desc.enabledToggles = enabled_adapter_toggles.data();
 
-      wgpu::RequestAdapterCallbackInfo req_adapter_callback_info = {};
-      req_adapter_callback_info.mode = wgpu::CallbackMode::WaitAnyOnly;
-      req_adapter_callback_info.callback = [](WGPURequestAdapterStatus status,
-                                              WGPUAdapter adapter, const char* message,
-                                              void* userdata) {
-        ORT_ENFORCE(status == WGPURequestAdapterStatus_Success, "Failed to get a WebGPU adapter: ", message);
-        *static_cast<wgpu::Adapter*>(userdata) = wgpu::Adapter::Acquire(adapter);
-      };
-      req_adapter_callback_info.userdata = &adapter_;
-      ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(instance_.RequestAdapter(&req_adapter_options, req_adapter_callback_info), UINT64_MAX));
+      ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(instance_.RequestAdapter(
+                                                                     &req_adapter_options,
+                                                                     wgpu::CallbackMode::WaitAnyOnly,
+                                                                     [](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter, wgpu::StringView message, wgpu::Adapter* ptr) {
+                                                                       ORT_ENFORCE(status == wgpu::RequestAdapterStatus::Success, "Failed to get a WebGPU adapter: ", std::string_view{message});
+                                                                       *ptr = adapter;
+                                                                     },
+                                                                     &adapter_),
+                                                                 UINT64_MAX));
       ORT_ENFORCE(adapter_ != nullptr, "Failed to get a WebGPU adapter.");
     }
 
@@ -103,14 +102,15 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info
         std::cerr << "WebGPU device lost (" << int(reason) << "): " << message;
       });
 
-      wgpu::RequestDeviceCallbackInfo req_device_callback_info = {};
-      req_device_callback_info.mode = wgpu::CallbackMode::WaitAnyOnly;
-      req_device_callback_info.callback = [](WGPURequestDeviceStatus status, WGPUDevice device, char const* message, void* userdata) {
-        ORT_ENFORCE(status == WGPURequestDeviceStatus_Success, "Failed to get a WebGPU device: ", message);
-        *static_cast<wgpu::Device*>(userdata) = wgpu::Device::Acquire(device);
-      };
-      req_device_callback_info.userdata = &device_;
-      ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(adapter_.RequestDevice(&device_desc, req_device_callback_info), UINT64_MAX));
+      ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(adapter_.RequestDevice(
+                                                                     &device_desc,
+                                                                     wgpu::CallbackMode::WaitAnyOnly,
+                                                                     [](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message, wgpu::Device* ptr) {
+                                                                       ORT_ENFORCE(status == wgpu::RequestDeviceStatus::Success, "Failed to get a WebGPU device: ", std::string_view{message});
+                                                                       *ptr = device;
+                                                                     },
+                                                                     &device_),
+                                                                 UINT64_MAX));
       ORT_ENFORCE(device_ != nullptr, "Failed to get a WebGPU device.");
     }
 
diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
index f20c68ac0bfaf..66209adf6f1a9 100644
--- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
+++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc
@@ -347,7 +347,8 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13,
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 8, Flatten);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, 10, Flatten);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Flatten);
-class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Flatten);
+class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, 20, Flatten);
+class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 21, Flatten);
 class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 6, 12, Tile);
 class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Tile);
 
@@ -667,10 +668,12 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
       // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Slice)>,
       // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Slice)>,
 
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 8, Flatten)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, 10, Flatten)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
-      // BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Flatten)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 8, Flatten)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, 10, Flatten)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Flatten)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, 20, Flatten)>,
+      BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 21, Flatten)>,
+
       BuildKernelCreateInfo<ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 6, 12, Tile)>,
       BuildKernelCreateInfo<ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Tile)>,
 
diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
index 329db75316e82..52fcc39ae5418 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc
@@ -311,12 +311,12 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
     if (input_defs.size() >= 3) {
       x_zero_point = model_builder.GetOperand(node.InputDefs()[2]->Name());
     } else {
-      x_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8);
+      x_zero_point = model_builder.CreateOrGetConstant<uint8_t>(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0);
     }
     if (input_defs.size() >= 4) {
       w_zero_point = model_builder.GetOperand(node.InputDefs()[3]->Name());
     } else {
-      w_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8);
+      w_zero_point = model_builder.CreateOrGetConstant<uint8_t>(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0);
     }
     output = model_builder.GetBuilder().call<emscripten::val>("conv2dInteger",
                                                               input, x_zero_point, filter, w_zero_point, options);
diff --git a/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc
index 5434194a214ac..9bb930c63b009 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc
@@ -59,22 +59,14 @@ Status DropoutOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
     std::vector<int64_t> mask_shape;
     ORT_RETURN_IF_NOT(GetShape(*output_defs[1], mask_shape, logger), "Cannot get mask output's shape");
     std::vector<uint32_t> dims = GetVecUint32FromVecInt64(mask_shape);
-
-    emscripten::val desc = emscripten::val::object();
-    desc.set("dataType", "uint8");
-    desc.set("dimensions", emscripten::val::array(dims));
-    desc.set("shape", emscripten::val::array(dims));
-    const auto num_elements = narrow<uint32_t>(Product(mask_shape));
-    emscripten::val ones_buffer = emscripten::val::global("Uint8Array").new_(num_elements);
-    ones_buffer.call<void>("fill", 1);
-
-    emscripten::val mask_output = model_builder.GetBuilder().call<emscripten::val>("constant", desc, ones_buffer);
+    emscripten::val one_constant = model_builder.CreateOrGetConstant<uint8_t>(
+        ONNX_NAMESPACE::TensorProto_DataType_BOOL, 1, dims);
 
     emscripten::val options = emscripten::val::object();
     options.set("label", output_defs[1]->Name() + "_identity");
     // Add additional identity op in case the mask is the output of a WebNN graph,
     // beacuse WebNN does not support a constant operand as output.
-    mask_output = model_builder.GetBuilder().call<emscripten::val>("identity", mask_output, options);
+    emscripten::val mask_output = model_builder.GetBuilder().call<emscripten::val>("identity", one_constant, options);
     model_builder.AddOperand(output_defs[1]->Name(), std::move(mask_output));
   }
   return Status::OK();
diff --git a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc
index 1477530ce1894..252d49a2f4d4d 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc
@@ -113,12 +113,12 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N
     if (input_defs.size() >= 3) {
       a_zero_point = model_builder.GetOperand(node.InputDefs()[2]->Name());
     } else {
-      a_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8);
+      a_zero_point = model_builder.CreateOrGetConstant<uint8_t>(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0);
     }
     if (input_defs.size() >= 4) {
       b_zero_point = model_builder.GetOperand(node.InputDefs()[3]->Name());
     } else {
-      b_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8);
+      b_zero_point = model_builder.CreateOrGetConstant<uint8_t>(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0);
     }
     output = model_builder.GetBuilder().call<emscripten::val>("matmulInteger",
                                                               a,
diff --git a/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc
index bdd1283c720f3..19f6d6aff8f97 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc
@@ -29,7 +29,8 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
                                            const Node& node,
                                            const logging::Logger& logger) const {
   const auto& input_defs = node.InputDefs();
-  const auto input_data_type = input_defs[0]->TypeAsProto()->tensor_type().elem_type();
+  int32_t input_data_type;
+  ORT_RETURN_IF_NOT(GetType(*input_defs[0], input_data_type, logger), "Cannot get input type");
   emscripten::val input = model_builder.GetOperand(input_defs[0]->Name());
   const auto node_name = node.Name();
   emscripten::val wnn_builder = model_builder.GetBuilder();
@@ -42,10 +43,10 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
 
   // Prepare WebNN constants for alpha, beta, bias attributes.
   // Assume T is float, because input_data_type has been limited to float32 and float16 in 'hasSupportedInitsImpl'.
-  emscripten::val alpha_constant = model_builder.CreateOrGetScalarConstant<float>(input_data_type, alpha);
-  emscripten::val beta_constant = model_builder.CreateOrGetScalarConstant<float>(input_data_type, beta);
-  emscripten::val bias_constant = model_builder.CreateOrGetScalarConstant<float>(input_data_type, bias);
-  emscripten::val pow1_constant = model_builder.CreateOrGetScalarConstant<float>(input_data_type, 2);
+  emscripten::val alpha_constant = model_builder.CreateOrGetConstant<float>(input_data_type, alpha);
+  emscripten::val beta_constant = model_builder.CreateOrGetConstant<float>(input_data_type, beta);
+  emscripten::val bias_constant = model_builder.CreateOrGetConstant<float>(input_data_type, bias);
+  emscripten::val pow1_constant = model_builder.CreateOrGetConstant<float>(input_data_type, 2);
 
   /**
       WebNN doesn't support LRN. So decompose it into a series of ops:
diff --git a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc
index fa82c2f85f0d8..79ed0393e3044 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc
@@ -100,7 +100,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
       X --> Pow --> ReduceMean --> Add --> Sqrt --> Div -> Mul
             ^          ^           ^                ^      ^
             |          |           |                |      |
-            Y:2        axis     B:epsilon           A:X  A:scale
+           Y:2        axis     B:epsilon           A:X  A:scale
       */
 
       int32_t input_type;
@@ -108,13 +108,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
       emscripten::val common_options = emscripten::val::object();
 
       // Pow
-      emscripten::val pow_constant_desc = emscripten::val::object();
-      ORT_RETURN_IF_NOT(SetWebnnDataType(pow_constant_desc, input_type), "Unsupported data type");
-      pow_constant_desc.set("shape", emscripten::val::array());
-      emscripten::val pow_buffer = emscripten::val::global("Float32Array").new_(1);
-      pow_buffer.set(0, 2);
-      emscripten::val pow_constant =
-          model_builder.GetBuilder().call<emscripten::val>("constant", pow_constant_desc, pow_buffer);
+      emscripten::val pow_constant = model_builder.CreateOrGetConstant<float>(input_type, 2);
       common_options.set("label", node.Name() + "_pow");
       emscripten::val pow =
           model_builder.GetBuilder().call<emscripten::val>("pow", input, pow_constant, common_options);
@@ -127,13 +121,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder
       emscripten::val reduce_mean = model_builder.GetBuilder().call<emscripten::val>("reduceMean", pow, reduce_options);
 
       // Add
-      emscripten::val add_constant_desc = emscripten::val::object();
-      ORT_RETURN_IF_NOT(SetWebnnDataType(add_constant_desc, input_type), "Unsupported data type");
-      add_constant_desc.set("shape", emscripten::val::array());
-      emscripten::val add_buffer = emscripten::val::global("Float32Array").new_(1);
-      add_buffer.set(0, epsilon);
-      emscripten::val add_constant =
-          model_builder.GetBuilder().call<emscripten::val>("constant", add_constant_desc, add_buffer);
+      emscripten::val add_constant = model_builder.CreateOrGetConstant<float>(input_type, epsilon);
       common_options.set("label", node.Name() + "_add");
       emscripten::val add =
           model_builder.GetBuilder().call<emscripten::val>("add", reduce_mean, add_constant, common_options);
diff --git a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
index 88fb79b146cd9..ca15e123d0999 100644
--- a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc
@@ -100,7 +100,10 @@ Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder,
       // zero_point has the same shape as the scale tensor.
       zero_point_shape = GetVecUint32FromVecInt64(scale_shape);
     }
-    zero_point = model_builder.GetZeroConstant(zero_point_type, zero_point_shape);
+    // Create a zero constant with the same shape as the scale tensor.
+    // The zero value has been pre-processed in the CreateOrGetConstant function,
+    // so the type of T is not relevant here.
+    zero_point = model_builder.CreateOrGetConstant<uint8_t>(zero_point_type, 0, zero_point_shape);
   }
 
   emscripten::val options = emscripten::val::object();
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc
index 8a82fce42189d..e8f116d390199 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.cc
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc
@@ -14,7 +14,6 @@
 #include "core/providers/common.h"
 #include "core/providers/shared/utils/utils.h"
 
-#include <sstream>
 #include <utility>
 
 namespace onnxruntime {
@@ -385,73 +384,6 @@ void ModelBuilder::AddOperand(const std::string& name, const emscripten::val& op
   wnn_operands_.insert(std::make_pair(name, operand));
 }
 
-// Get the zero constant with shape.
-const emscripten::val& ModelBuilder::GetZeroConstant(const int32_t& data_type,
-                                                     const std::vector<uint32_t>& shape) {
-  std::string name = "webnn_zero_constant_" + std::to_string(data_type);
-  emscripten::val dims = emscripten::val::array();
-  if (!shape.empty()) {
-    dims = emscripten::val::array(shape);
-    std::ostringstream name_stream;
-    name_stream << name;
-    for (const auto& dim : shape) {
-      name_stream << "_" << dim;
-    }
-    name = name_stream.str();
-  }
-  // If the operand does not exist, create it.
-  if (wnn_operands_.find(name) == wnn_operands_.end()) {
-    emscripten::val desc = emscripten::val::object();
-    desc.set("dimensions", dims);
-    desc.set("shape", dims);
-    emscripten::val zero_buffer = emscripten::val::undefined();
-    if (!SetWebnnDataType(desc, data_type)) {
-      ORT_THROW("Unsupported data type: " + std::to_string(data_type));
-    }
-    auto num_elements = Product(shape);
-    switch (data_type) {
-      case ONNX_NAMESPACE::TensorProto_DataType_INT4:
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT4:
-        // For WebNN int4 and uint4 tensors are stored in Uint8Array,
-        // so we need to adjust the number of elements.
-        num_elements = (num_elements + 1) / 2;
-        zero_buffer = emscripten::val::global("Uint8Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
-        zero_buffer = emscripten::val::global("Uint8Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_INT8:
-        zero_buffer = emscripten::val::global("Int8Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-        zero_buffer = emscripten::val::global("Uint16Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-        zero_buffer = emscripten::val::global("Float32Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-        zero_buffer = emscripten::val::global("Int32Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-        zero_buffer = emscripten::val::global("BigInt64Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
-        zero_buffer = emscripten::val::global("Uint32Array").new_(num_elements);
-        break;
-      case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
-        zero_buffer = emscripten::val::global("BigUint64Array").new_(num_elements);
-        break;
-      default:
-        break;
-    }
-
-    emscripten::val zero_constant = wnn_builder_.call<emscripten::val>("constant", desc, zero_buffer);
-    wnn_operands_.insert(std::make_pair(name, zero_constant));
-  }
-  return wnn_operands_.at(name);
-}
-
 void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) {
   skipped_initializers_.insert(tensor_name);
 }
diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h
index c482e9d05b301..0fc2fa20670c7 100644
--- a/onnxruntime/core/providers/webnn/builders/model_builder.h
+++ b/onnxruntime/core/providers/webnn/builders/model_builder.h
@@ -11,6 +11,7 @@
 #include "core/framework/execution_provider.h"
 #include "core/providers/webnn/builders/helper.h"
 
+#include <sstream>
 #include <emscripten.h>
 #include <emscripten/val.h>
 
@@ -38,11 +39,10 @@ class ModelBuilder {
   const emscripten::val& GetOpSupportLimits() const { return wnn_limits_; }
 
   void AddOperand(const std::string& name, const emscripten::val& operand);
-  const emscripten::val& GetZeroConstant(
-      const int32_t& data_type, const std::vector<uint32_t>& shape = {});
 
   template <typename T>
-  const emscripten::val& CreateOrGetScalarConstant(const int32_t& data_type, T value);
+  const emscripten::val& CreateOrGetConstant(const int32_t& data_type, T value,
+                                             const std::vector<uint32_t>& shape = {});
 
   // Use the buffers to persist WebNN allocated data like transposed weight.
   // It ensures the validity during inference session.
@@ -103,11 +103,12 @@ class ModelBuilder {
   static const IOpBuilder* GetOpBuilder(const Node& node);
 };
 
-// Create a scalar constant MLOperand of the specified value and data type.
-// Workaround for builer.constant(type, value) method since it has not been implemented now.
+// Create or retrieve one of the following:
+// - A WebNN constant MLOperand filled with the specified value, data type, and shape.
+// - A WebNN scalar constant MLOperand with the specified value and data type.
+// For scalar constant, it is workaround for builer.constant(type, value) method since
+// it has not been implemented now.
 // https://webmachinelearning.github.io/webnn/#api-mlgraphbuilder-constant-type-value
-// BTW, the spec is discussing if the builder.constant(type, value) should be dropped at
-// https://github.com/webmachinelearning/webnn/issues/475. Fix me according to the spec decision.
 //
 // This function enforces a mapping between the data_type and the value types:
 // - TensorProto_DataType_INT4    <-> int8_t
@@ -122,69 +123,96 @@ class ModelBuilder {
 // - TensorProto_DataType_UINT32  <-> uint32_t
 // - TensorProto_DataType_UINT64  <-> uint64_t
 template <typename T>
-const emscripten::val& ModelBuilder::CreateOrGetScalarConstant(const int32_t& data_type, T value) {
-  std::string name = "webnn_scalar_constant_" + std::to_string(data_type) + "_" + std::to_string(value);
-  emscripten::val desc = emscripten::val::object();
-  desc.set("shape", emscripten::val::array());
-  emscripten::val scalar_buffer = emscripten::val::undefined();
-  uint16_t value_uint16 = 0;
-  uint8_t value_uint8 = 0;
-  if (!SetWebnnDataType(desc, data_type)) {
-    ORT_THROW("Unsupported data type: " + std::to_string(data_type));
+const emscripten::val& ModelBuilder::CreateOrGetConstant(const int32_t& data_type, T value,
+                                                         const std::vector<uint32_t>& shape) {
+  std::string name = "webnn_constant_" + std::to_string(data_type) + "_" + std::to_string(value);
+  emscripten::val dims = emscripten::val::array();
+  if (!shape.empty()) {
+    dims = emscripten::val::array(shape);
+    std::ostringstream name_stream;
+    name_stream << name;
+    for (const auto& dim : shape) {
+      name_stream << "_" << dim;
+    }
+    name = name_stream.str();
   }
 
   // If the operand does not exist, create it.
   if (wnn_operands_.find(name) == wnn_operands_.end()) {
+    emscripten::val desc = emscripten::val::object();
+    desc.set("shape", dims);
+    desc.set("dimensions", dims);
+    emscripten::val buffer = emscripten::val::undefined();
+    if (!SetWebnnDataType(desc, data_type)) {
+      ORT_THROW("Unsupported data type: " + std::to_string(data_type));
+    }
+    auto num_elements = Product(shape);
     switch (data_type) {
       case ONNX_NAMESPACE::TensorProto_DataType_INT4:
       case ONNX_NAMESPACE::TensorProto_DataType_UINT4:
-        scalar_buffer = emscripten::val::global("Uint8Array").new_(1);
-        value_uint8 = PackInt8ToUint8AsNibble(value, data_type);
-        scalar_buffer.call<void>("fill", emscripten::val(value_uint8));
+        // For WebNN int4 and uint4 tensors are stored in Uint8Array,
+        // so we need to adjust the number of elements.
+        num_elements = (num_elements + 1) / 2;
+        buffer = emscripten::val::global("Uint8Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(PackInt8ToUint8AsNibble(value, data_type)));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_BOOL:
-        scalar_buffer = emscripten::val::global("Uint8Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value ? 1 : 0));
-        break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT8:
-        scalar_buffer = emscripten::val::global("Uint8Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value));
+        buffer = emscripten::val::global("Uint8Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_INT8:
-        scalar_buffer = emscripten::val::global("Int8Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value));
+        buffer = emscripten::val::global("Int8Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16:
-        scalar_buffer = emscripten::val::global("Uint16Array").new_(1);
-        value_uint16 = PackFloat32ToUint16AsFloat16(value);
-        scalar_buffer.call<void>("fill", emscripten::val(value_uint16));
+        buffer = emscripten::val::global("Uint16Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(PackFloat32ToUint16AsFloat16(value)));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_FLOAT:
-        scalar_buffer = emscripten::val::global("Float32Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value));
+        buffer = emscripten::val::global("Float32Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_INT32:
-        scalar_buffer = emscripten::val::global("Int32Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value));
+        buffer = emscripten::val::global("Int32Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT32:
-        scalar_buffer = emscripten::val::global("Uint32Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val(value));
+        buffer = emscripten::val::global("Uint32Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_INT64:
-        scalar_buffer = emscripten::val::global("BigInt64Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val::global("BigInt")(value));
+        buffer = emscripten::val::global("BigInt64Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val::global("BigInt")(value));
+        }
         break;
       case ONNX_NAMESPACE::TensorProto_DataType_UINT64:
-        scalar_buffer = emscripten::val::global("BigUint64Array").new_(1);
-        scalar_buffer.call<void>("fill", emscripten::val::global("BigInt")(value));
+        buffer = emscripten::val::global("BigUint64Array").new_(num_elements);
+        if (value) {
+          buffer.call<void>("fill", emscripten::val::global("BigInt")(value));
+        }
         break;
       default:
         break;
     }
 
-    const emscripten::val scalar_constant = wnn_builder_.call<emscripten::val>("constant", desc, scalar_buffer);
-    wnn_operands_.insert(std::make_pair(name, scalar_constant));
+    const emscripten::val constant = wnn_builder_.call<emscripten::val>("constant", desc, buffer);
+    wnn_operands_.insert(std::make_pair(name, constant));
   }
 
   return wnn_operands_.at(name);
diff --git a/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp b/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp
index a455007c2f6ae..b598c20e29280 100644
--- a/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp
+++ b/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp
@@ -81,7 +81,6 @@ class MlasNeonFp16CastTest : public MlasTestBase {
 
 class MlasNeonFp16PrepackTest : public MlasTestBase {
  private:
-  std::random_device rd_;  // a seed source for the random number engine
   unsigned int seed_;
   std::mt19937 gen_;  // mersenne_twister_engine seeded with rd()
   std::uniform_int_distribution<> distrib_;
@@ -173,7 +172,7 @@ class MlasNeonFp16PrepackTest : public MlasTestBase {
 
  public:
   MlasNeonFp16PrepackTest()
-      : seed_(rd_()), gen_(seed_), distrib_(0, 255) {
+      : seed_(19287), gen_(seed_), distrib_(0, 255) {
   }
 
   static const char* GetTestSuiteName() {
@@ -197,7 +196,6 @@ class MlasNeonFp16PrepackTest : public MlasTestBase {
 
 class MlasNeonFp16DequantBTest : public MlasTestBase {
  private:
-  std::random_device rd_;  // a seed source for the random number engine
   unsigned int seed_;
   std::mt19937 gen_;  // mersenne_twister_engine seeded with rd()
   std::uniform_int_distribution<> distrib_;
@@ -318,7 +316,7 @@ class MlasNeonFp16DequantBTest : public MlasTestBase {
 
  public:
   MlasNeonFp16DequantBTest()
-      : seed_(rd_()), gen_(seed_), distrib_(0, 255), _distribFp(0.5f, 2.0f) {
+      : seed_(19287), gen_(seed_), distrib_(0, 255), _distribFp(0.5f, 2.0f) {
   }
 
   static const char* GetTestSuiteName() {
@@ -353,7 +351,6 @@ class MlasNeonFp16DequantBTest : public MlasTestBase {
 
 class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase {
  private:
-  std::random_device rd_;  // a seed source for the random number engine
   unsigned int seed_;
   std::mt19937 gen_;  // mersenne_twister_engine seeded with rd()
   MatrixGuardBuffer<MLAS_FP16> A_, B_, C_, ref_, bias_;
@@ -404,7 +401,7 @@ class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase {
     for (size_t m = 0; m < M; ++m) {
       for (size_t n = 0; n < N; ++n) {
         size_t i = m * Ldc + n;
-        ASSERT_TRUE(FloatEqual(target[i], ref[i], 0.015f, 0.03f))
+        ASSERT_TRUE(FloatEqual(target[i], ref[i], 0.02f, 0.055f))
             << " seed " << seed_
             << " v0 " << target[i] << " v1 " << ref[i]
             << " m " << m << " n " << n;
@@ -439,7 +436,7 @@ class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase {
 
  public:
   MlasNeonFp16HQ4BitGemmKernelTest()
-      : seed_(rd_()), gen_(seed_) {
+      : seed_(19287), gen_(seed_) {
   }
 
   static const char* GetTestSuiteName() {
diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
index 63327a028c6f4..0022d7fc0e184 100644
--- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
+++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc
@@ -342,8 +342,12 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   Graph& graph = model->MainGraph();
   GraphViewer viewer(graph);
 
+  std::string trt_version = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR);
+  std::string cuda_version = std::to_string(CUDA_VERSION);
+  std::string ort_version = ORT_VERSION;
+
   // get the hash for the model when loaded from file
-  HashValue model_hash = TRTGenerateId(viewer);
+  HashValue model_hash = TRTGenerateId(viewer, trt_version, cuda_version);
   ASSERT_NE(model_hash, 0);
 
   // now load the model from bytes and check the hash differs
@@ -358,7 +362,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   // Test loading same model from file and byte steam. Hash values should be different
   Graph& graph2 = model2->MainGraph();
   GraphViewer viewer2(graph2);
-  HashValue model_hash2 = TRTGenerateId(viewer2);
+  HashValue model_hash2 = TRTGenerateId(viewer2, trt_version, cuda_version);
   ASSERT_NE(model_hash, model_hash2);
 
   // Test loading same model from different path, see if hash values are same as well
@@ -367,7 +371,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) {
   ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK());
   Graph& graph3 = model3->MainGraph();
   GraphViewer viewer3(graph3);
-  HashValue model_hash3 = TRTGenerateId(viewer3);
+  HashValue model_hash3 = TRTGenerateId(viewer3, trt_version, cuda_version);
   ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded";
 }
 
diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py
index 8fc76da3495a8..a274b90dc042f 100644
--- a/onnxruntime/test/python/onnx_backend_test_series.py
+++ b/onnxruntime/test/python/onnx_backend_test_series.py
@@ -105,7 +105,7 @@ def load_jsonc(basename: str):
     return json.loads("\n".join(lines))
 
 
-def create_backend_test(test_name=None):
+def create_backend_test(devices: list[str], test_name=None):
     """Creates an OrtBackendTest and adds its TestCase's to global scope so unittest will find them."""
 
     overrides = load_jsonc("onnx_backend_test_series_overrides.jsonc")
@@ -126,30 +126,29 @@ def create_backend_test(test_name=None):
     else:
         filters = load_jsonc("onnx_backend_test_series_filters.jsonc")
         current_failing_tests = apply_filters(filters, "current_failing_tests")
-
         if platform.architecture()[0] == "32bit":
             current_failing_tests += apply_filters(filters, "current_failing_tests_x86")
 
-        if backend.supports_device("DNNL"):
+        if backend.supports_device("DNNL") or "DNNL" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_DNNL")
 
-        if backend.supports_device("NNAPI"):
+        if backend.supports_device("NNAPI") or "NNAPI" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_NNAPI")
 
-        if backend.supports_device("OPENVINO_GPU"):
+        if backend.supports_device("OPENVINO_GPU") or "OPENVINO_GPU" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_GPU")
 
-        if backend.supports_device("OPENVINO_CPU"):
+        if backend.supports_device("OPENVINO_CPU") or "OPENVINO_CPU" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP32")
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16")
 
-        if backend.supports_device("OPENVINO_NPU"):
+        if backend.supports_device("OPENVINO_NPU") or "OPENVINO_NPU" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU")
 
-        if backend.supports_device("OPENVINO"):
+        if backend.supports_device("OPENVINO") or "OPENVINO" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18")
 
-        if backend.supports_device("MIGRAPHX"):
+        if backend.supports_device("MIGRAPHX") or "MIGRAPHX" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_MIGRAPHX")
 
         if backend.supports_device("WEBGPU"):
@@ -158,8 +157,16 @@ def create_backend_test(test_name=None):
         # Skip these tests for a "pure" DML onnxruntime python wheel. We keep these tests enabled for instances where both DML and CUDA
         # EPs are available (Windows GPU CI pipeline has this config) - these test will pass because CUDA has higher precedence than DML
         # and the nodes are assigned to only the CUDA EP (which supports these tests)
-        if backend.supports_device("DML") and not backend.supports_device("GPU"):
+        if (backend.supports_device("DML") and not backend.supports_device("GPU")) or "DML" in devices:
             current_failing_tests += apply_filters(filters, "current_failing_tests_pure_DML")
+            # exclude CUDA EP when DML test is running.
+            os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,CUDAExecutionProvider"
+        elif backend.supports_device("DML") and "DML" not in devices:
+            # exclude DML EP when CUDA test is running.
+            os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,DmlExecutionProvider"
+        else:
+            # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior
+            os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
 
         filters = (
             current_failing_tests
@@ -172,9 +179,6 @@ def create_backend_test(test_name=None):
         backend_test.exclude("(" + "|".join(filters) + ")")
         print("excluded tests:", filters)
 
-        # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior
-        os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider"
-
     # import all test cases at global scope to make
     # them visible to python.unittest.
     globals().update(backend_test.enable_report().test_cases)
@@ -199,6 +203,15 @@ def parse_args():
         help="Only run tests that match this value. Matching is regex based, and '.*' is automatically appended",
     )
 
+    parser.add_argument(
+        "--devices",
+        type=str,
+        choices=["CPU", "CUDA", "MIGRAPHX", "DNNL", "DML", "OPENVINO_GPU", "OPENVINO_CPU", "OPENVINO_NPU", "OPENVINO"],
+        nargs="+",  # allows multiple values
+        default=["CPU"],  # default to ["CPU"] if no input is given
+        help="Select one or more devices CPU, CUDA, MIGRAPHX, DNNL, DML, OPENVINO_GPU, OPENVINO_CPU, OPENVINO_NPU, OPENVINO",
+    )
+
     # parse just our args. python unittest has its own args and arg parsing, and that runs inside unittest.main()
     parsed, unknown = parser.parse_known_args()
     sys.argv = sys.argv[:1] + unknown
@@ -209,5 +222,5 @@ def parse_args():
 if __name__ == "__main__":
     args = parse_args()
 
-    create_backend_test(args.test_name)
+    create_backend_test(args.devices, args.test_name)
     unittest.main()
diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
index f083ab14ad133..7ecaab6fedb02 100644
--- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
+++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc
@@ -750,6 +750,13 @@
         "^test_reduce_log_sum_empty_set_cpu",
         "^test_reduce_log_sum_exp_empty_set_cpu",
         "^test_reduce_prod_empty_set_cpu",
+        // Bug: DML EP some how executes these CUDA tests and failed
+        // TODO: Remove these tests when DML EP is fixed
+        "^test_convtranspose_autopad_same_cuda",
+        "^test_asin_example_cuda",
+        "^test_dynamicquantizelinear_cuda",
+        "^test_dynamicquantizelinear_expanded_cuda",
+        "^test_reduce_min_empty_set_cuda",
         //Bug: DML EP does not execute operators with an empty input tensor
         //TODO: Resolve as a graph implementation that returns a constant inf tensor with appropriate strides
         "^test_reduce_min_empty_set_cpu"
diff --git a/onnxruntime/test/wasm/package-lock.json b/onnxruntime/test/wasm/package-lock.json
index 522e96fc3188a..3bd5d173dbe79 100644
--- a/onnxruntime/test/wasm/package-lock.json
+++ b/onnxruntime/test/wasm/package-lock.json
@@ -27,9 +27,9 @@
       }
     },
     "node_modules/@socket.io/component-emitter": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz",
-      "integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz",
+      "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==",
       "dev": true
     },
     "node_modules/@types/cookie": {
@@ -39,19 +39,22 @@
       "dev": true
     },
     "node_modules/@types/cors": {
-      "version": "2.8.13",
-      "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.13.tgz",
-      "integrity": "sha512-RG8AStHlUiV5ysZQKq97copd2UmVYw3/pRMLefISZ3S1hK104Cwm7iLQ3fTKx+lsUH2CE8FlLaYeEA2LSeqYUA==",
+      "version": "2.8.17",
+      "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz",
+      "integrity": "sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==",
       "dev": true,
       "dependencies": {
         "@types/node": "*"
       }
     },
     "node_modules/@types/node": {
-      "version": "18.13.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz",
-      "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg==",
-      "dev": true
+      "version": "22.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz",
+      "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==",
+      "dev": true,
+      "dependencies": {
+        "undici-types": "~6.20.0"
+      }
     },
     "node_modules/accepts": {
       "version": "1.3.8",
@@ -162,12 +165,12 @@
       }
     },
     "node_modules/braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "dev": true,
       "dependencies": {
-        "fill-range": "^7.0.1"
+        "fill-range": "^7.1.1"
       },
       "engines": {
         "node": ">=8"
@@ -288,9 +291,9 @@
       }
     },
     "node_modules/cookie": {
-      "version": "0.4.2",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.2.tgz",
-      "integrity": "sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA==",
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
+      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
       "dev": true,
       "engines": {
         "node": ">= 0.6"
@@ -409,9 +412,9 @@
       }
     },
     "node_modules/engine.io": {
-      "version": "6.4.2",
-      "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.4.2.tgz",
-      "integrity": "sha512-FKn/3oMiJjrOEOeUub2WCox6JhxBXq/Zn3fZOMCBxKnNYtsdKjxhl7yR3fZhM9PV+rdE75SU5SYMc+2PGzo+Tg==",
+      "version": "6.6.2",
+      "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.6.2.tgz",
+      "integrity": "sha512-gmNvsYi9C8iErnZdVcJnvCpSKbWTt1E8+JZo8b+daLninywUWi5NQ5STSHZ9rFjFO7imNcvb8Pc5pe/wMR5xEw==",
       "dev": true,
       "dependencies": {
         "@types/cookie": "^0.4.1",
@@ -419,32 +422,32 @@
         "@types/node": ">=10.0.0",
         "accepts": "~1.3.4",
         "base64id": "2.0.0",
-        "cookie": "~0.4.1",
+        "cookie": "~0.7.2",
         "cors": "~2.8.5",
         "debug": "~4.3.1",
-        "engine.io-parser": "~5.0.3",
-        "ws": "~8.11.0"
+        "engine.io-parser": "~5.2.1",
+        "ws": "~8.17.1"
       },
       "engines": {
-        "node": ">=10.0.0"
+        "node": ">=10.2.0"
       }
     },
     "node_modules/engine.io-parser": {
-      "version": "5.0.6",
-      "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.0.6.tgz",
-      "integrity": "sha512-tjuoZDMAdEhVnSFleYPCtdL2GXwVTGtNjoeJd9IhIG3C1xs9uwxqRNEu5WpnDZCaozwVlK/nuQhpodhXSIMaxw==",
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz",
+      "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==",
       "dev": true,
       "engines": {
         "node": ">=10.0.0"
       }
     },
     "node_modules/engine.io/node_modules/debug": {
-      "version": "4.3.4",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+      "version": "4.3.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+      "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
       "dev": true,
       "dependencies": {
-        "ms": "2.1.2"
+        "ms": "^2.1.3"
       },
       "engines": {
         "node": ">=6.0"
@@ -456,9 +459,9 @@
       }
     },
     "node_modules/engine.io/node_modules/ms": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
       "dev": true
     },
     "node_modules/ent": {
@@ -516,9 +519,9 @@
       "dev": true
     },
     "node_modules/fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
       "dev": true,
       "dependencies": {
         "to-regex-range": "^5.0.1"
@@ -1304,35 +1307,60 @@
       }
     },
     "node_modules/socket.io": {
-      "version": "4.6.0",
-      "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.6.0.tgz",
-      "integrity": "sha512-b65bp6INPk/BMMrIgVvX12x3Q+NqlGqSlTuvKQWt0BUJ3Hyy3JangBl7fEoWZTXbOKlCqNPbQ6MbWgok/km28w==",
+      "version": "4.8.1",
+      "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.8.1.tgz",
+      "integrity": "sha512-oZ7iUCxph8WYRHHcjBEc9unw3adt5CmSNlppj/5Q4k2RIrhl8Z5yY2Xr4j9zj0+wzVZ0bxmYoGSzKJnRl6A4yg==",
       "dev": true,
       "dependencies": {
         "accepts": "~1.3.4",
         "base64id": "~2.0.0",
+        "cors": "~2.8.5",
         "debug": "~4.3.2",
-        "engine.io": "~6.4.0",
+        "engine.io": "~6.6.0",
         "socket.io-adapter": "~2.5.2",
-        "socket.io-parser": "~4.2.1"
+        "socket.io-parser": "~4.2.4"
       },
       "engines": {
-        "node": ">=10.0.0"
+        "node": ">=10.2.0"
       }
     },
     "node_modules/socket.io-adapter": {
-      "version": "2.5.2",
-      "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.2.tgz",
-      "integrity": "sha512-87C3LO/NOMc+eMcpcxUBebGjkpMDkNBS9tf7KJqcDsmL936EChtVva71Dw2q4tQcuVC+hAUy4an2NO/sYXmwRA==",
+      "version": "2.5.5",
+      "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.5.tgz",
+      "integrity": "sha512-eLDQas5dzPgOWCk9GuuJC2lBqItuhKI4uxGgo9aIV7MYbk2h9Q6uULEh8WBzThoI7l+qU9Ast9fVUmkqPP9wYg==",
+      "dev": true,
+      "dependencies": {
+        "debug": "~4.3.4",
+        "ws": "~8.17.1"
+      }
+    },
+    "node_modules/socket.io-adapter/node_modules/debug": {
+      "version": "4.3.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+      "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
       "dev": true,
       "dependencies": {
-        "ws": "~8.11.0"
+        "ms": "^2.1.3"
+      },
+      "engines": {
+        "node": ">=6.0"
+      },
+      "peerDependenciesMeta": {
+        "supports-color": {
+          "optional": true
+        }
       }
     },
+    "node_modules/socket.io-adapter/node_modules/ms": {
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+      "dev": true
+    },
     "node_modules/socket.io-parser": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.3.tgz",
-      "integrity": "sha512-JMafRntWVO2DCJimKsRTh/wnqVvO4hrfwOqtO7f+uzwsQMuxO6VwImtYxaQ+ieoyshWOTJyV0fA21lccEXRPpQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.4.tgz",
+      "integrity": "sha512-/GbIKmo8ioc+NIWIhwdecY0ge+qVBSMdgxGygevmdHj24bsfgtCmcUUcQ5ZzcylGFHsN3k4HB4Cgkl96KVnuew==",
       "dev": true,
       "dependencies": {
         "@socket.io/component-emitter": "~3.1.0",
@@ -1343,12 +1371,12 @@
       }
     },
     "node_modules/socket.io-parser/node_modules/debug": {
-      "version": "4.3.4",
-      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-      "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+      "version": "4.3.7",
+      "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+      "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
       "dev": true,
       "dependencies": {
-        "ms": "2.1.2"
+        "ms": "^2.1.3"
       },
       "engines": {
         "node": ">=6.0"
@@ -1360,9 +1388,9 @@
       }
     },
     "node_modules/socket.io-parser/node_modules/ms": {
-      "version": "2.1.2",
-      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-      "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+      "version": "2.1.3",
+      "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+      "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
       "dev": true
     },
     "node_modules/socket.io/node_modules/debug": {
@@ -1534,6 +1562,12 @@
         "node": "*"
       }
     },
+    "node_modules/undici-types": {
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
+      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
+      "dev": true
+    },
     "node_modules/universalify": {
       "version": "0.1.2",
       "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
@@ -1615,16 +1649,16 @@
       "dev": true
     },
     "node_modules/ws": {
-      "version": "8.11.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.11.0.tgz",
-      "integrity": "sha512-HPG3wQd9sNQoT9xHyNCXoDUa+Xw/VevmY9FoHyQ+g+rrMn4j6FB4np7Z0OhdTgjx6MgQLK7jwSy1YecU1+4Asg==",
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
+      "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
       "dev": true,
       "engines": {
         "node": ">=10.0.0"
       },
       "peerDependencies": {
         "bufferutil": "^4.0.1",
-        "utf-8-validate": "^5.0.2"
+        "utf-8-validate": ">=5.0.2"
       },
       "peerDependenciesMeta": {
         "bufferutil": {
@@ -1686,9 +1720,9 @@
       "dev": true
     },
     "@socket.io/component-emitter": {
-      "version": "3.1.0",
-      "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz",
-      "integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg==",
+      "version": "3.1.2",
+      "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz",
+      "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==",
       "dev": true
     },
     "@types/cookie": {
@@ -1698,19 +1732,22 @@
       "dev": true
     },
     "@types/cors": {
-      "version": "2.8.13",
-      "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.13.tgz",
-      "integrity": "sha512-RG8AStHlUiV5ysZQKq97copd2UmVYw3/pRMLefISZ3S1hK104Cwm7iLQ3fTKx+lsUH2CE8FlLaYeEA2LSeqYUA==",
+      "version": "2.8.17",
+      "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz",
+      "integrity": "sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==",
       "dev": true,
       "requires": {
         "@types/node": "*"
       }
     },
     "@types/node": {
-      "version": "18.13.0",
-      "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz",
-      "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg==",
-      "dev": true
+      "version": "22.10.1",
+      "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz",
+      "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==",
+      "dev": true,
+      "requires": {
+        "undici-types": "~6.20.0"
+      }
     },
     "accepts": {
       "version": "1.3.8",
@@ -1796,12 +1833,12 @@
       }
     },
     "braces": {
-      "version": "3.0.2",
-      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz",
-      "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==",
+      "version": "3.0.3",
+      "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz",
+      "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==",
       "dev": true,
       "requires": {
-        "fill-range": "^7.0.1"
+        "fill-range": "^7.1.1"
       }
     },
     "bytes": {
@@ -1890,9 +1927,9 @@
       "dev": true
     },
     "cookie": {
-      "version": "0.4.2",
-      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.2.tgz",
-      "integrity": "sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA==",
+      "version": "0.7.2",
+      "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz",
+      "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==",
       "dev": true
     },
     "cors": {
@@ -1986,9 +2023,9 @@
       "dev": true
     },
     "engine.io": {
-      "version": "6.4.2",
-      "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.4.2.tgz",
-      "integrity": "sha512-FKn/3oMiJjrOEOeUub2WCox6JhxBXq/Zn3fZOMCBxKnNYtsdKjxhl7yR3fZhM9PV+rdE75SU5SYMc+2PGzo+Tg==",
+      "version": "6.6.2",
+      "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.6.2.tgz",
+      "integrity": "sha512-gmNvsYi9C8iErnZdVcJnvCpSKbWTt1E8+JZo8b+daLninywUWi5NQ5STSHZ9rFjFO7imNcvb8Pc5pe/wMR5xEw==",
       "dev": true,
       "requires": {
         "@types/cookie": "^0.4.1",
@@ -1996,34 +2033,34 @@
         "@types/node": ">=10.0.0",
         "accepts": "~1.3.4",
         "base64id": "2.0.0",
-        "cookie": "~0.4.1",
+        "cookie": "~0.7.2",
         "cors": "~2.8.5",
         "debug": "~4.3.1",
-        "engine.io-parser": "~5.0.3",
-        "ws": "~8.11.0"
+        "engine.io-parser": "~5.2.1",
+        "ws": "~8.17.1"
       },
       "dependencies": {
         "debug": {
-          "version": "4.3.4",
-          "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-          "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+          "version": "4.3.7",
+          "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+          "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
           "dev": true,
           "requires": {
-            "ms": "2.1.2"
+            "ms": "^2.1.3"
           }
         },
         "ms": {
-          "version": "2.1.2",
-          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-          "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+          "version": "2.1.3",
+          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+          "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
           "dev": true
         }
       }
     },
     "engine.io-parser": {
-      "version": "5.0.6",
-      "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.0.6.tgz",
-      "integrity": "sha512-tjuoZDMAdEhVnSFleYPCtdL2GXwVTGtNjoeJd9IhIG3C1xs9uwxqRNEu5WpnDZCaozwVlK/nuQhpodhXSIMaxw==",
+      "version": "5.2.3",
+      "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz",
+      "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==",
       "dev": true
     },
     "ent": {
@@ -2072,9 +2109,9 @@
       "dev": true
     },
     "fill-range": {
-      "version": "7.0.1",
-      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz",
-      "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==",
+      "version": "7.1.1",
+      "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz",
+      "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==",
       "dev": true,
       "requires": {
         "to-regex-range": "^5.0.1"
@@ -2651,17 +2688,18 @@
       }
     },
     "socket.io": {
-      "version": "4.6.0",
-      "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.6.0.tgz",
-      "integrity": "sha512-b65bp6INPk/BMMrIgVvX12x3Q+NqlGqSlTuvKQWt0BUJ3Hyy3JangBl7fEoWZTXbOKlCqNPbQ6MbWgok/km28w==",
+      "version": "4.8.1",
+      "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.8.1.tgz",
+      "integrity": "sha512-oZ7iUCxph8WYRHHcjBEc9unw3adt5CmSNlppj/5Q4k2RIrhl8Z5yY2Xr4j9zj0+wzVZ0bxmYoGSzKJnRl6A4yg==",
       "dev": true,
       "requires": {
         "accepts": "~1.3.4",
         "base64id": "~2.0.0",
+        "cors": "~2.8.5",
         "debug": "~4.3.2",
-        "engine.io": "~6.4.0",
+        "engine.io": "~6.6.0",
         "socket.io-adapter": "~2.5.2",
-        "socket.io-parser": "~4.2.1"
+        "socket.io-parser": "~4.2.4"
       },
       "dependencies": {
         "debug": {
@@ -2682,18 +2720,36 @@
       }
     },
     "socket.io-adapter": {
-      "version": "2.5.2",
-      "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.2.tgz",
-      "integrity": "sha512-87C3LO/NOMc+eMcpcxUBebGjkpMDkNBS9tf7KJqcDsmL936EChtVva71Dw2q4tQcuVC+hAUy4an2NO/sYXmwRA==",
+      "version": "2.5.5",
+      "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.5.tgz",
+      "integrity": "sha512-eLDQas5dzPgOWCk9GuuJC2lBqItuhKI4uxGgo9aIV7MYbk2h9Q6uULEh8WBzThoI7l+qU9Ast9fVUmkqPP9wYg==",
       "dev": true,
       "requires": {
-        "ws": "~8.11.0"
+        "debug": "~4.3.4",
+        "ws": "~8.17.1"
+      },
+      "dependencies": {
+        "debug": {
+          "version": "4.3.7",
+          "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+          "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
+          "dev": true,
+          "requires": {
+            "ms": "^2.1.3"
+          }
+        },
+        "ms": {
+          "version": "2.1.3",
+          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+          "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
+          "dev": true
+        }
       }
     },
     "socket.io-parser": {
-      "version": "4.2.3",
-      "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.3.tgz",
-      "integrity": "sha512-JMafRntWVO2DCJimKsRTh/wnqVvO4hrfwOqtO7f+uzwsQMuxO6VwImtYxaQ+ieoyshWOTJyV0fA21lccEXRPpQ==",
+      "version": "4.2.4",
+      "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.4.tgz",
+      "integrity": "sha512-/GbIKmo8ioc+NIWIhwdecY0ge+qVBSMdgxGygevmdHj24bsfgtCmcUUcQ5ZzcylGFHsN3k4HB4Cgkl96KVnuew==",
       "dev": true,
       "requires": {
         "@socket.io/component-emitter": "~3.1.0",
@@ -2701,18 +2757,18 @@
       },
       "dependencies": {
         "debug": {
-          "version": "4.3.4",
-          "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz",
-          "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==",
+          "version": "4.3.7",
+          "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz",
+          "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==",
           "dev": true,
           "requires": {
-            "ms": "2.1.2"
+            "ms": "^2.1.3"
           }
         },
         "ms": {
-          "version": "2.1.2",
-          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz",
-          "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==",
+          "version": "2.1.3",
+          "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz",
+          "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==",
           "dev": true
         }
       }
@@ -2817,6 +2873,12 @@
       "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==",
       "dev": true
     },
+    "undici-types": {
+      "version": "6.20.0",
+      "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz",
+      "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==",
+      "dev": true
+    },
     "universalify": {
       "version": "0.1.2",
       "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz",
@@ -2874,9 +2936,9 @@
       "dev": true
     },
     "ws": {
-      "version": "8.11.0",
-      "resolved": "https://registry.npmjs.org/ws/-/ws-8.11.0.tgz",
-      "integrity": "sha512-HPG3wQd9sNQoT9xHyNCXoDUa+Xw/VevmY9FoHyQ+g+rrMn4j6FB4np7Z0OhdTgjx6MgQLK7jwSy1YecU1+4Asg==",
+      "version": "8.17.1",
+      "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz",
+      "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==",
       "dev": true,
       "requires": {}
     },
diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py
index 19f66245a45e2..1b34b3d302e57 100644
--- a/tools/ci_build/github/android/build_aar_package.py
+++ b/tools/ci_build/github/android/build_aar_package.py
@@ -23,11 +23,11 @@
 
 # Onnx Runtime native library is built against NDK API 21 by default
 # It is possible to build from source for Android API levels below 21, but it is not guaranteed
-DEFAULT_ANDROID_MIN_SDK_VER = 21
+DEFAULT_ANDROID_MIN_SDK_VER = 24
 
 # Android API 24 is the default target API version for Android builds, based on Microsoft 1CS requirements
 # It is possible to build from source using API level 21 and higher as the target SDK version
-DEFAULT_ANDROID_TARGET_SDK_VER = 24
+DEFAULT_ANDROID_TARGET_SDK_VER = 34
 
 
 def _parse_build_settings(args):
diff --git a/tools/ci_build/github/android/default_full_aar_build_settings.json b/tools/ci_build/github/android/default_full_aar_build_settings.json
index b0eff75812673..1c7769c623d41 100644
--- a/tools/ci_build/github/android/default_full_aar_build_settings.json
+++ b/tools/ci_build/github/android/default_full_aar_build_settings.json
@@ -5,8 +5,8 @@
         "x86",
         "x86_64"
     ],
-    "android_min_sdk_version": 21,
-    "android_target_sdk_version": 24,
+    "android_min_sdk_version": 24,
+    "android_target_sdk_version": 34,
     "build_params": [
         "--enable_lto",
         "--android",
diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml
new file mode 100644
index 0000000000000..9a721c65de332
--- /dev/null
+++ b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml
@@ -0,0 +1,21 @@
+parameters:
+- name: EP_NAME
+  type: string
+  default: CPU
+
+- name: PYTHON_VERSION
+  type: string
+
+steps:
+- powershell: |
+    python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq
+    Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate}
+    mkdir -p $(Agent.TempDirectory)\ort_test_data
+    Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data
+    Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data
+    cd $(Agent.TempDirectory)\ort_test_data
+    python onnx_backend_test_series.py --devices ${{ parameters.EP_NAME }} -v
+    cd $(Agent.TempDirectory)
+    Remove-Item -Path $(Agent.TempDirectory)\ort_test_data -Recurse -Force
+  workingDirectory: '$(Build.sourcesDirectory)'
+  displayName: 'Run Python Tests with ${{ parameters.EP_NAME }} EP'
\ No newline at end of file
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
index 947e4f99b984f..f7235e3ad2076 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml
@@ -56,7 +56,7 @@ stages:
           PYTHON_VERSION: ${{ python_version }}
           EP_NAME: gpu
           CudaVersion: ${{ parameters.cuda_version }}
-          EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
+          EP_BUILD_FLAGS: --use_dml --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80"
           use_tensorrt: True
 
   - ${{ if eq(parameters.enable_linux_cuda, true) }}:
diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
index aa7f2845fc0fa..dd0539f751c89 100644
--- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
+++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml
@@ -33,7 +33,7 @@ parameters:
    - Release
    - RelWithDebInfo
    - MinSizeRel
-   
+
 - name: use_tensorrt
   type: boolean
   default: false
@@ -134,7 +134,7 @@ stages:
                 --cmake_generator "$(VSGenerator)"
                 --enable_pybind
                 --enable_onnx_tests
-                --parallel --use_binskim_compliant_compile_flags --update --build
+                --parallel 4 --use_binskim_compliant_compile_flags --update --build
                 $(TelemetryOption) ${{ parameters.BUILD_PY_PARAMETERS }} ${{ parameters.EP_BUILD_FLAGS }} ${{ variables.trt_build_flag }}
               workingDirectory: '$(Build.BinariesDirectory)'
 
@@ -206,19 +206,20 @@ stages:
             DownloadTRT: ${{ parameters.use_tensorrt }}
 
         - task: PowerShell@2
-          displayName: 'Install ONNX'
+          displayName: 'Install Third Party Dependencies'
           inputs:
             filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1'
             workingDirectory: '$(Build.BinariesDirectory)'
             arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\${{ parameters.cmake_build_type }}\installed -build_config ${{ parameters.cmake_build_type }}
 
-        - powershell: |
-            python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq
-            Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate}
-            mkdir -p $(Agent.TempDirectory)\ort_test_data
-            Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data
-            Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data
-            cd $(Agent.TempDirectory)\ort_test_data
-            python onnx_backend_test_series.py
-          workingDirectory: '$(Build.sourcesDirectory)'
-          displayName: 'Run Python Tests'
+        - template: jobs/steps/py_packaging_test_step.yml
+          parameters:
+            EP_NAME: DML
+            PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }}
+
+        - template: jobs/steps/py_packaging_test_step.yml
+          parameters:
+            EP_NAME: CUDA
+            PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }}
+
+
diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
index 4fa36a1ff548b..949479fb8b5e4 100644
--- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml
@@ -11,7 +11,7 @@ steps:
       packageType: upack
       feed: '/7424c8e4-5c62-490e-95c4-79446f31017c'
       definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0'
-      version: 1.0.200
+      version: 1.0.201
       downloadPath: $(Build.BinariesDirectory)/deps
 
 # The private ADO project
@@ -22,7 +22,7 @@ steps:
       packageType: upack
       feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325'
       definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a'
-      version: 1.0.200
+      version: 1.0.201
       downloadPath: $(Build.BinariesDirectory)/deps
 
 # You can add more ADO accounts at here.
diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml
index 080079388a76c..ab31e592d7d71 100644
--- a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml
@@ -68,9 +68,6 @@ stages:
   jobs:
     - job: MacOS_C_API_Package_Publish
       pool:
-        ${{ if eq(parameters.DoESRP, true)}}:
-          vmImage: 'macOS-12'
-        ${{ else }}:
           vmImage: 'macOS-13'
       steps:
       - checkout: none
diff --git a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
index d8ea1c35c89c4..29c5f6bb34d7a 100644
--- a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
+++ b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml
@@ -261,8 +261,6 @@ stages:
             publishJUnitResults: true
             testResultsFiles: '**/TEST-*.xml'
             testRunTitle: 'React Native Android Instrumented Test results'
-            javaHomeOption: 'path'
-            jdkDirectory: '$(JAVA_HOME_11_X64)'
             sonarQubeRunAnalysis: false
             spotBugsAnalysis: false
           displayName: Run React Native Android Instrumented Tests