diff --git a/cgmanifests/generated/cgmanifest.json b/cgmanifests/generated/cgmanifest.json index df27fa5ab1b95..07dff50f9a3bd 100644 --- a/cgmanifests/generated/cgmanifest.json +++ b/cgmanifests/generated/cgmanifest.json @@ -346,7 +346,7 @@ "component": { "type": "git", "git": { - "commitHash": "511eb80847afe6bded34ec491a38d5d78ba2d604", + "commitHash": "12a3b24c456cebd9fd11f23ac0164f78129b00c6", "repositoryUrl": "https://github.com/google/dawn.git" }, "comments": "dawn" diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt index 70ac62954ad6d..7710ab2f4cac7 100644 --- a/cmake/CMakeLists.txt +++ b/cmake/CMakeLists.txt @@ -148,6 +148,7 @@ option(onnxruntime_USE_XNNPACK "Build with XNNPACK support. Provides an alternat option(onnxruntime_USE_WEBNN "Build with WebNN support. Enable hardware acceleration in web browsers." OFF) option(onnxruntime_USE_WEBGPU "Build with WebGPU support. Enable WebGPU via C/C++ interface." OFF) option(onnxruntime_USE_EXTERNAL_DAWN "Build with treating Dawn as external dependency. Will not link Dawn at build time." OFF) +option(onnxruntime_CUSTOM_DAWN_SRC_PATH "Path to custom Dawn src dir.") # Options related to reducing the binary size produced by the build # XNNPACK EP requires the internal NHWC contrib ops to be available, so this option must be OFF when onnxruntime_USE_XNNPACK is ON diff --git a/cmake/deps.txt b/cmake/deps.txt index 9cf92bf417fcb..21f9ee1701c46 100644 --- a/cmake/deps.txt +++ b/cmake/deps.txt @@ -58,5 +58,5 @@ extensions;https://github.com/microsoft/onnxruntime-extensions/archive/94142d839 composable_kernel;https://github.com/ROCmSoftwarePlatform/composable_kernel/archive/204da9c522cebec5220bba52cd3542ebcaf99e7a.zip;1827348efd47831c13074245274d41b7cae8a557 directx_headers;https://github.com/microsoft/DirectX-Headers/archive/refs/tags/v1.613.1.zip;47653509a3371eabb156360f42faf582f314bf2e cudnn_frontend;https://github.com/NVIDIA/cudnn-frontend/archive/refs/tags/v1.7.0.zip;d0753d8d5b39947ca0729d7773cb84653a129eb1 -dawn;https://github.com/google/dawn/archive/511eb80847afe6bded34ec491a38d5d78ba2d604.zip;c493f5aca5586f6634e25d0121c85df71189fb99 +dawn;https://github.com/google/dawn/archive/12a3b24c456cebd9fd11f23ac0164f78129b00c6.zip;ad428f6dc16f1336d584f7bad5714e1097dafc43 kleidiai;https://gitlab.arm.com/kleidi/kleidiai/-/archive/v0.2.0/kleidiai-v0.2.0.zip;B1E3173992FD91F20DB904AB77D6E901778C2681 diff --git a/cmake/external/onnxruntime_external_deps.cmake b/cmake/external/onnxruntime_external_deps.cmake index d9e833a2d8cd4..ee7abcbad025c 100644 --- a/cmake/external/onnxruntime_external_deps.cmake +++ b/cmake/external/onnxruntime_external_deps.cmake @@ -615,12 +615,25 @@ if (onnxruntime_USE_COREML) endif() if (onnxruntime_USE_WEBGPU) - FetchContent_Declare( - dawn - URL ${DEP_URL_dawn} - URL_HASH SHA1=${DEP_SHA1_dawn} - PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch - ) + if (onnxruntime_CUSTOM_DAWN_SRC_PATH) + # use the custom dawn source path if provided + # + # specified as: + # build.py --use_webgpu --cmake_extra_defines "onnxruntime_CUSTOM_DAWN_SRC_PATH=" + FetchContent_Declare( + dawn + SOURCE_DIR ${onnxruntime_CUSTOM_DAWN_SRC_PATH} + ) + else() + FetchContent_Declare( + dawn + URL ${DEP_URL_dawn} + URL_HASH SHA1=${DEP_SHA1_dawn} + # All previous patches are merged into the upstream dawn project. We don't need to apply any patches right now. + # if we need to apply patches in the future, we can uncomment the following line. + # PATCH_COMMAND ${Patch_EXECUTABLE} --binary --ignore-whitespace -p1 < ${PROJECT_SOURCE_DIR}/patches/dawn/dawn.patch + ) + endif() # use dawn::dawn_native and dawn::dawn_proc instead of the monolithic dawn::webgpu_dawn to minimize binary size set(DAWN_BUILD_MONOLITHIC_LIBRARY OFF CACHE BOOL "" FORCE) diff --git a/cmake/patches/dawn/dawn.patch b/cmake/patches/dawn/dawn.patch deleted file mode 100644 index 7a2a01d55be46..0000000000000 --- a/cmake/patches/dawn/dawn.patch +++ /dev/null @@ -1,81 +0,0 @@ -diff --git a/src/dawn/native/CMakeLists.txt b/src/dawn/native/CMakeLists.txt -index 9c0bd6fa4e..bf8a57aeac 100644 ---- a/src/dawn/native/CMakeLists.txt -+++ b/src/dawn/native/CMakeLists.txt -@@ -857,6 +857,11 @@ if (DAWN_ENABLE_SWIFTSHADER) - target_compile_definitions(dawn_native PRIVATE "DAWN_ENABLE_SWIFTSHADER") - endif() - -+if (IOS) -+ target_compile_options(dawn_native_objects PRIVATE -fno-objc-arc) -+ target_compile_options(dawn_native PRIVATE -fno-objc-arc) -+endif() -+ - if (DAWN_BUILD_MONOLITHIC_LIBRARY) - ############################################################################### - # Do the 'complete_lib' build. -diff --git a/src/dawn/native/Surface_metal.mm b/src/dawn/native/Surface_metal.mm -index ce55acbd43..2cfd363479 100644 ---- a/src/dawn/native/Surface_metal.mm -+++ b/src/dawn/native/Surface_metal.mm -@@ -33,10 +33,18 @@ - - #import - -+#include "dawn/common/Platform.h" -+ - namespace dawn::native { - - bool InheritsFromCAMetalLayer(void* obj) { -- id object = static_cast(obj); -+ id object = -+#if DAWN_PLATFORM_IS(IOS) -+ (__bridge id)obj; -+#else // DAWN_PLATFORM_IS(IOS) -+ static_cast(obj); -+#endif // DAWN_PLATFORM_IS(IOS) -+ - return [object isKindOfClass:[CAMetalLayer class]]; - } - -diff --git a/src/dawn/native/metal/SharedFenceMTL.mm b/src/dawn/native/metal/SharedFenceMTL.mm -index bde8bfea07..8906185d6f 100644 ---- a/src/dawn/native/metal/SharedFenceMTL.mm -+++ b/src/dawn/native/metal/SharedFenceMTL.mm -@@ -25,6 +25,8 @@ - // OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE - // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -+#include "dawn/common/Platform.h" -+ - #include "dawn/native/metal/SharedFenceMTL.h" - - #include "dawn/native/ChainUtils.h" -@@ -39,8 +41,13 @@ ResultOrError> SharedFence::Create( - const SharedFenceMTLSharedEventDescriptor* descriptor) { - DAWN_INVALID_IF(descriptor->sharedEvent == nullptr, "MTLSharedEvent is missing."); - if (@available(macOS 10.14, iOS 12.0, *)) { -- return AcquireRef(new SharedFence( -- device, label, static_cast>(descriptor->sharedEvent))); -+ return AcquireRef(new SharedFence(device, label, -+#if DAWN_PLATFORM_IS(IOS) -+ (__bridge id)(descriptor->sharedEvent) -+#else // DAWN_PLATFORM_IS(IOS) -+ static_cast>(descriptor->sharedEvent) -+#endif // DAWN_PLATFORM_IS(IOS) -+ )); - } else { - return DAWN_INTERNAL_ERROR("MTLSharedEvent not supported."); - } -diff --git a/src/tint/api/BUILD.cmake b/src/tint/api/BUILD.cmake -index 0037d83276..6372c4ee77 100644 ---- a/src/tint/api/BUILD.cmake -+++ b/src/tint/api/BUILD.cmake -@@ -57,6 +57,7 @@ tint_target_add_dependencies(tint_api lib - tint_lang_wgsl_ast_transform - tint_lang_wgsl_common - tint_lang_wgsl_features -+ tint_lang_wgsl_inspector - tint_lang_wgsl_program - tint_lang_wgsl_sem - tint_lang_wgsl_writer_ir_to_program diff --git a/dockerfiles/Dockerfile.cuda b/dockerfiles/Dockerfile.cuda index ce4560e9b0c7c..40f11dca623a7 100644 --- a/dockerfiles/Dockerfile.cuda +++ b/dockerfiles/Dockerfile.cuda @@ -48,7 +48,7 @@ RUN cd /code \ && python3 -m venv /code/env \ && . /code/env/bin/activate \ && pip install --upgrade psutil setuptools wheel packaging \ - && pip install -r tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt \ + && pip install -r /code/tools/ci_build/github/linux/python/requirements.txt \ && python /code/tools/ci_build/build.py --build_dir /code/build/Linux \ --allow_running_as_root --skip_submodule_sync \ --use_cuda --cuda_home /usr/local/cuda \ diff --git a/docs/How_To_Update_ONNX_Dev_Notes.md b/docs/How_To_Update_ONNX_Dev_Notes.md index 4d8a286bde66e..199e6671f6a1a 100644 --- a/docs/How_To_Update_ONNX_Dev_Notes.md +++ b/docs/How_To_Update_ONNX_Dev_Notes.md @@ -21,7 +21,7 @@ This file should be generated. See [cgmanifests/README](/cgmanifests/README.md) - [onnxruntime/test/python/requirements.txt](/onnxruntime/test/python/requirements.txt) - [tools/ci_build/github/linux/docker/scripts/requirements.txt](/tools/ci_build/github/linux/docker/scripts/requirements.txt) - [tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt](/tools/ci_build/github/linux/docker/scripts/manylinux/requirements.txt) -- [tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt](/tools/ci_build/github/linux/docker/inference/x86_64/python/cpu/scripts/requirements.txt) +- [tools/ci_build/github/linux/python/requirements.txt](/tools/ci_build/github/linux/python/requirements.txt) - Run `git grep -rn "onnx==1" .` to find other locations and update this document if necessary. 1. If there is any change to `cmake/external/onnx/onnx/*.in.proto`, you need to regenerate OnnxMl.cs. diff --git a/java/build-android.gradle b/java/build-android.gradle index d5839f9f27869..9c4275b74f626 100644 --- a/java/build-android.gradle +++ b/java/build-android.gradle @@ -82,7 +82,7 @@ allprojects { } android { - compileSdkVersion 32 + compileSdkVersion 34 defaultConfig { minSdkVersion minSdkVer @@ -108,8 +108,8 @@ android { } compileOptions { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } sourceSets { diff --git a/java/build.gradle b/java/build.gradle index 34ac93cce6f4e..845121dd17a48 100644 --- a/java/build.gradle +++ b/java/build.gradle @@ -50,8 +50,8 @@ mavenSettings { } java { - sourceCompatibility = JavaVersion.VERSION_1_8 - targetCompatibility = JavaVersion.VERSION_1_8 + sourceCompatibility = JavaVersion.VERSION_17 + targetCompatibility = JavaVersion.VERSION_17 } // This jar tasks serves as a CMAKE signaling diff --git a/java/gradle/wrapper/gradle-wrapper.properties b/java/gradle/wrapper/gradle-wrapper.properties index 4baf5a11d45a3..381baa9cef1ec 100644 --- a/java/gradle/wrapper/gradle-wrapper.properties +++ b/java/gradle/wrapper/gradle-wrapper.properties @@ -1,7 +1,7 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=9631d53cf3e74bfa726893aee1f8994fee4e060c401335946dba2156f440f24c -distributionUrl=https\://services.gradle.org/distributions/gradle-8.6-bin.zip +distributionSha256Sum=544c35d6bd849ae8a5ed0bcea39ba677dc40f49df7d1835561582da2009b961d +distributionUrl=https\://services.gradle.org/distributions/gradle-8.7-bin.zip networkTimeout=10000 validateDistributionUrl=true zipStoreBase=GRADLE_USER_HOME diff --git a/java/gradlew.bat b/java/gradlew.bat index 93e3f59f135dd..25da30dbdeee9 100644 --- a/java/gradlew.bat +++ b/java/gradlew.bat @@ -43,11 +43,11 @@ set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 if %ERRORLEVEL% equ 0 goto execute -echo. -echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail @@ -57,11 +57,11 @@ set JAVA_EXE=%JAVA_HOME%/bin/java.exe if exist "%JAVA_EXE%" goto execute -echo. -echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% -echo. -echo Please set the JAVA_HOME variable in your environment to match the -echo location of your Java installation. +echo. 1>&2 +echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% 1>&2 +echo. 1>&2 +echo Please set the JAVA_HOME variable in your environment to match the 1>&2 +echo location of your Java installation. 1>&2 goto fail diff --git a/java/src/test/android/app/build.gradle b/java/src/test/android/app/build.gradle index ecbc4b90612dd..baf18e714d25c 100644 --- a/java/src/test/android/app/build.gradle +++ b/java/src/test/android/app/build.gradle @@ -7,12 +7,12 @@ def minSdkVer = System.properties.get("minSdkVer")?:24 def qnnVersion = System.properties['qnnVersion'] android { - compileSdkVersion 32 + compileSdkVersion 34 defaultConfig { applicationId "ai.onnxruntime.example.javavalidator" minSdkVersion minSdkVer - targetSdkVersion 32 + targetSdkVersion 34 versionCode 1 versionName "1.0" @@ -34,11 +34,11 @@ android { } } compileOptions { - sourceCompatibility JavaVersion.VERSION_1_8 - targetCompatibility JavaVersion.VERSION_1_8 + sourceCompatibility JavaVersion.VERSION_17 + targetCompatibility JavaVersion.VERSION_17 } kotlinOptions { - jvmTarget = '1.8' + jvmTarget = '17' } // Conditional packagingOptions for QNN builds only if (qnnVersion != null) { @@ -69,11 +69,11 @@ dependencies { implementation 'com.google.android.material:material:1.3.0' implementation 'androidx.constraintlayout:constraintlayout:2.0.4' testImplementation 'junit:junit:4.+' - androidTestImplementation 'androidx.test.ext:junit:1.1.3' - androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0' + androidTestImplementation "androidx.test.ext:junit:1.1.5" + androidTestImplementation "androidx.test.espresso:espresso-core:3.5.0" - androidTestImplementation 'androidx.test:runner:1.4.0' - androidTestImplementation 'androidx.test:rules:1.4.0' + androidTestImplementation "androidx.test:runner:1.5.2" + androidTestImplementation "androidx.test:rules:1.5.0" androidTestImplementation 'com.microsoft.appcenter:espresso-test-extension:1.4' // dependencies for onnxruntime-android-qnn diff --git a/js/.eslintrc.js b/js/.eslintrc.js index bd1e9061355f5..462e417df1d66 100644 --- a/js/.eslintrc.js +++ b/js/.eslintrc.js @@ -198,19 +198,6 @@ module.exports = { '_OrtReleaseTensor', '_OrtRun', '_OrtRunWithBinding', - '_OrtTrainingCopyParametersFromBuffer', - '_OrtTrainingCopyParametersToBuffer', - '_OrtTrainingCreateSession', - '_OrtTrainingEvalStep', - '_OrtTrainingGetModelInputOutputCount', - '_OrtTrainingGetModelInputOutputName', - '_OrtTrainingGetParametersSize', - '_OrtTrainingLazyResetGrad', - '_OrtTrainingLoadCheckpoint', - '_OrtTrainingOptimizerStep', - '_OrtTrainingReleaseCheckpoint', - '_OrtTrainingReleaseSession', - '_OrtTrainingRunTrainStep', ], }, ], diff --git a/js/common/lib/backend.ts b/js/common/lib/backend.ts index e27e67622aa82..e63f9c6c9147f 100644 --- a/js/common/lib/backend.ts +++ b/js/common/lib/backend.ts @@ -3,7 +3,6 @@ import { InferenceSession } from './inference-session.js'; import { OnnxValue } from './onnx-value.js'; -import { TrainingSession } from './training-session.js'; /** * @ignore @@ -42,33 +41,6 @@ export interface InferenceSessionHandler extends SessionHandler { ): Promise; } -/** - * Represent a handler instance of a training inference session. - * - * @ignore - */ -export interface TrainingSessionHandler extends SessionHandler { - readonly evalInputNames: readonly string[]; - readonly evalOutputNames: readonly string[]; - - lazyResetGrad(): Promise; - runTrainStep( - feeds: SessionHandler.FeedsType, - fetches: SessionHandler.FetchesType, - options: InferenceSession.RunOptions, - ): Promise; - runOptimizerStep(options: InferenceSession.RunOptions): Promise; - runEvalStep( - feeds: SessionHandler.FeedsType, - fetches: SessionHandler.FetchesType, - options: InferenceSession.RunOptions, - ): Promise; - - getParametersSize(trainableOnly: boolean): Promise; - loadParametersBuffer(buffer: Uint8Array, trainableOnly: boolean): Promise; - getContiguousParameters(trainableOnly: boolean): Promise; -} - /** * Represent a backend that provides implementation of model inferencing. * @@ -84,14 +56,6 @@ export interface Backend { uriOrBuffer: string | Uint8Array, options?: InferenceSession.SessionOptions, ): Promise; - - createTrainingSessionHandler?( - checkpointStateUriOrBuffer: TrainingSession.UriOrBuffer, - trainModelUriOrBuffer: TrainingSession.UriOrBuffer, - evalModelUriOrBuffer: TrainingSession.UriOrBuffer, - optimizerModelUriOrBuffer: TrainingSession.UriOrBuffer, - options: InferenceSession.SessionOptions, - ): Promise; } export { registerBackend } from './backend-impl.js'; diff --git a/js/common/lib/env.ts b/js/common/lib/env.ts index 642a897a90d26..e70f608ad7030 100644 --- a/js/common/lib/env.ts +++ b/js/common/lib/env.ts @@ -2,6 +2,7 @@ // Licensed under the MIT License. import { env as envImpl } from './env-impl.js'; +import { TryGetGlobalType } from './type-helper.js'; export declare namespace Env { export type WasmPathPrefix = string; @@ -14,7 +15,6 @@ export declare namespace Env { * If not modified, the filename of the .wasm file is: * - `ort-wasm-simd-threaded.wasm` for default build * - `ort-wasm-simd-threaded.jsep.wasm` for JSEP build (with WebGPU and WebNN) - * - `ort-training-wasm-simd-threaded.wasm` for training build */ wasm?: URL | string; /** @@ -25,7 +25,6 @@ export declare namespace Env { * If not modified, the filename of the .mjs file is: * - `ort-wasm-simd-threaded.mjs` for default build * - `ort-wasm-simd-threaded.jsep.mjs` for JSEP build (with WebGPU and WebNN) - * - `ort-training-wasm-simd-threaded.mjs` for training build */ mjs?: URL | string; } @@ -200,22 +199,16 @@ export declare namespace Env { * value will be the GPU adapter that created by the underlying WebGPU backend. * * When use with TypeScript, the type of this property is `GPUAdapter` defined in "@webgpu/types". - * Use `const adapter = env.webgpu.adapter as GPUAdapter;` in TypeScript to access this property with correct type. - * - * see comments on {@link Tensor.GpuBufferType} */ - adapter: unknown; + adapter: TryGetGlobalType<'GPUAdapter'>; /** * Get the device for WebGPU. * * This property is only available after the first WebGPU inference session is created. * * When use with TypeScript, the type of this property is `GPUDevice` defined in "@webgpu/types". - * Use `const device = env.webgpu.device as GPUDevice;` in TypeScript to access this property with correct type. - * - * see comments on {@link Tensor.GpuBufferType} for more details about why not use types defined in "@webgpu/types". */ - readonly device: unknown; + readonly device: TryGetGlobalType<'GPUDevice'>; /** * Set or get whether validate input content. * diff --git a/js/common/lib/index.ts b/js/common/lib/index.ts index 3ed56b3c2e812..d75e6a477258d 100644 --- a/js/common/lib/index.ts +++ b/js/common/lib/index.ts @@ -26,4 +26,3 @@ export * from './tensor-factory.js'; export * from './trace.js'; export * from './onnx-model.js'; export * from './onnx-value.js'; -export * from './training-session.js'; diff --git a/js/common/lib/inference-session.ts b/js/common/lib/inference-session.ts index 547db029471a2..e62c6579e8333 100644 --- a/js/common/lib/inference-session.ts +++ b/js/common/lib/inference-session.ts @@ -4,6 +4,7 @@ import { InferenceSession as InferenceSessionImpl } from './inference-session-impl.js'; import { OnnxModelOptions } from './onnx-model.js'; import { OnnxValue, OnnxValueDataLocation } from './onnx-value.js'; +import { TryGetGlobalType } from './type-helper.js'; /* eslint-disable @typescript-eslint/no-redeclare */ @@ -282,7 +283,7 @@ export declare namespace InferenceSession { extends WebNNExecutionProviderName, Omit, Required> { - context: unknown /* MLContext */; + context: TryGetGlobalType<'MLContext'>; } /** @@ -291,8 +292,8 @@ export declare namespace InferenceSession { * @see https://www.w3.org/TR/webnn/#dom-ml-createcontext-gpudevice */ export interface WebNNOptionsWebGpu extends WebNNExecutionProviderName { - context: unknown /* MLContext */; - gpuDevice: unknown /* GPUDevice */; + context: TryGetGlobalType<'MLContext'>; + gpuDevice: TryGetGlobalType<'GPUDevice'>; } /** diff --git a/js/common/lib/tensor.ts b/js/common/lib/tensor.ts index af918705b97e3..05553bd96662b 100644 --- a/js/common/lib/tensor.ts +++ b/js/common/lib/tensor.ts @@ -4,6 +4,7 @@ import { TensorFactory } from './tensor-factory.js'; import { Tensor as TensorImpl } from './tensor-impl.js'; import { TypedTensorUtils } from './tensor-utils.js'; +import { TryGetGlobalType } from './type-helper.js'; /* eslint-disable @typescript-eslint/no-redeclare */ @@ -131,24 +132,19 @@ export declare namespace Tensor { */ export type TextureDataTypes = 'float32'; + type GpuBufferTypeFallback = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' }; /** * type alias for WebGPU buffer - * - * The reason why we don't use type "GPUBuffer" defined in webgpu.d.ts from @webgpu/types is because "@webgpu/types" - * requires "@types/dom-webcodecs" as peer dependency when using TypeScript < v5.1 and its version need to be chosen - * carefully according to the TypeScript version being used. This means so far there is not a way to keep every - * TypeScript version happy. It turns out that we will easily broke users on some TypeScript version. - * - * for more info see https://github.com/gpuweb/types/issues/127 */ - export type GpuBufferType = { size: number; mapState: 'unmapped' | 'pending' | 'mapped' }; + export type GpuBufferType = TryGetGlobalType<'GPUBuffer', GpuBufferTypeFallback>; + type MLTensorTypeFallback = { destroy(): void }; /** * type alias for WebNN MLTensor * * The specification for WebNN's MLTensor is currently in flux. */ - export type MLTensorType = unknown; + export type MLTensorType = TryGetGlobalType<'MLTensor', MLTensorTypeFallback>; /** * supported data types for constructing a tensor from a WebGPU buffer diff --git a/js/common/lib/training-session-impl.ts b/js/common/lib/training-session-impl.ts deleted file mode 100644 index 21dbe5fe51bb9..0000000000000 --- a/js/common/lib/training-session-impl.ts +++ /dev/null @@ -1,273 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -import { resolveBackendAndExecutionProviders } from './backend-impl.js'; -import { SessionHandler, TrainingSessionHandler } from './backend.js'; -import { InferenceSession as InferenceSession } from './inference-session.js'; -import { OnnxValue } from './onnx-value.js'; -import { Tensor } from './tensor.js'; -import { TrainingSession as TrainingSessionInterface, TrainingSessionCreateOptions } from './training-session.js'; - -type SessionOptions = InferenceSession.SessionOptions; -type FeedsType = InferenceSession.FeedsType; -type FetchesType = InferenceSession.FetchesType; -type ReturnType = InferenceSession.ReturnType; -type RunOptions = InferenceSession.RunOptions; - -const noBackendErrMsg: string = - 'Training backend could not be resolved. ' + "Make sure you're using the correct configuration & WebAssembly files."; - -export class TrainingSession implements TrainingSessionInterface { - private constructor(handler: TrainingSessionHandler, hasOptimizerModel: boolean, hasEvalModel: boolean) { - this.handler = handler; - this.hasOptimizerModel = hasOptimizerModel; - this.hasEvalModel = hasEvalModel; - } - private handler: TrainingSessionHandler; - private hasOptimizerModel: boolean; - private hasEvalModel: boolean; - - get trainingInputNames(): readonly string[] { - return this.handler.inputNames; - } - get trainingOutputNames(): readonly string[] { - return this.handler.outputNames; - } - - get evalInputNames(): readonly string[] { - if (this.hasEvalModel) { - return this.handler.evalInputNames; - } else { - throw new Error('This training session has no evalModel loaded.'); - } - } - get evalOutputNames(): readonly string[] { - if (this.hasEvalModel) { - return this.handler.evalOutputNames; - } else { - throw new Error('This training session has no evalModel loaded.'); - } - } - - static async create( - trainingOptions: TrainingSessionCreateOptions, - sessionOptions?: SessionOptions, - ): Promise { - const evalModel: string | Uint8Array = trainingOptions.evalModel || ''; - const optimizerModel: string | Uint8Array = trainingOptions.optimizerModel || ''; - const options: SessionOptions = sessionOptions || {}; - - // resolve backend, update session options with validated EPs, and create session handler - const [backend, optionsWithValidatedEPs] = await resolveBackendAndExecutionProviders(options); - if (backend.createTrainingSessionHandler) { - const handler = await backend.createTrainingSessionHandler( - trainingOptions.checkpointState, - trainingOptions.trainModel, - evalModel, - optimizerModel, - optionsWithValidatedEPs, - ); - return new TrainingSession(handler, !!trainingOptions.optimizerModel, !!trainingOptions.evalModel); - } else { - throw new Error(noBackendErrMsg); - } - } - - /** - * Helper function for runTrainStep and future runStep methods that handles the type-narrowing conversion from - * the given parameters to SessionHandler.FetchesType and RunOptions. - * - * @param inputNames the feeds object is checked that they contain all input names in the provided list of input - * names. - * @param outputNames the fetches object is checked that their keys match up with valid names in the list of output - * names. - * @param feeds the required input - * @param arg1 narrowed & converted into the SessionHandler.FetchesType or RunOptions object - * @param arg2 optional RunOptions object. - * @returns - */ - typeNarrowingForRunStep( - inputNames: readonly string[], - outputNames: readonly string[], - feeds: FeedsType, - arg1?: FetchesType | RunOptions, - arg2?: RunOptions, - ): [SessionHandler.FetchesType, RunOptions] { - const fetches: { [name: string]: OnnxValue | null } = {}; - let options: RunOptions = {}; - // check inputs - if (typeof feeds !== 'object' || feeds === null || feeds instanceof Tensor || Array.isArray(feeds)) { - throw new TypeError( - "'feeds' must be an object that use input names as keys and OnnxValue as corresponding values.", - ); - } - - let isFetchesEmpty = true; - // determine which override is being used - if (typeof arg1 === 'object') { - if (arg1 === null) { - throw new TypeError('Unexpected argument[1]: cannot be null.'); - } - if (arg1 instanceof Tensor) { - throw new TypeError("'fetches' cannot be a Tensor"); - } - - if (Array.isArray(arg1)) { - if (arg1.length === 0) { - throw new TypeError("'fetches' cannot be an empty array."); - } - isFetchesEmpty = false; - // output names - for (const name of arg1) { - if (typeof name !== 'string') { - throw new TypeError("'fetches' must be a string array or an object."); - } - if (outputNames.indexOf(name) === -1) { - throw new RangeError(`'fetches' contains invalid output name: ${name}.`); - } - fetches[name] = null; - } - - if (typeof arg2 === 'object' && arg2 !== null) { - options = arg2; - } else if (typeof arg2 !== 'undefined') { - throw new TypeError("'options' must be an object."); - } - } else { - // decide whether arg1 is fetches or options - // if any output name is present and its value is valid OnnxValue, we consider it fetches - let isFetches = false; - const arg1Keys = Object.getOwnPropertyNames(arg1); - for (const name of outputNames) { - if (arg1Keys.indexOf(name) !== -1) { - const v = (arg1 as InferenceSession.NullableOnnxValueMapType)[name]; - if (v === null || v instanceof Tensor) { - isFetches = true; - isFetchesEmpty = false; - fetches[name] = v; - } - } - } - - if (isFetches) { - if (typeof arg2 === 'object' && arg2 !== null) { - options = arg2; - } else if (typeof arg2 !== 'undefined') { - throw new TypeError("'options' must be an object."); - } - } else { - options = arg1 as RunOptions; - } - } - } else if (typeof arg1 !== 'undefined') { - throw new TypeError("Unexpected argument[1]: must be 'fetches' or 'options'."); - } - - // check if all inputs are in feed - for (const name of inputNames) { - if (typeof feeds[name] === 'undefined') { - throw new Error(`input '${name}' is missing in 'feeds'.`); - } - } - - // if no fetches is specified, we use the full output names list - if (isFetchesEmpty) { - for (const name of outputNames) { - fetches[name] = null; - } - } - - return [fetches, options]; - } - - /** - * Helper method for runTrainStep and any other runStep methods. Takes the ReturnType result from the SessionHandler - * and changes it into a map of Tensors. - * - * @param results - * @returns - */ - convertHandlerReturnTypeToMapOfTensors(results: SessionHandler.ReturnType): ReturnType { - const returnValue: { [name: string]: OnnxValue } = {}; - for (const key in results) { - if (Object.hasOwnProperty.call(results, key)) { - const result = results[key]; - if (result instanceof Tensor) { - returnValue[key] = result; - } else { - returnValue[key] = new Tensor(result.type, result.data, result.dims); - } - } - } - return returnValue; - } - - async lazyResetGrad(): Promise { - await this.handler.lazyResetGrad(); - } - - runTrainStep(feeds: FeedsType, options?: RunOptions): Promise; - runTrainStep(feeds: FeedsType, fetches: FetchesType, options?: RunOptions): Promise; - async runTrainStep(feeds: FeedsType, arg1?: FetchesType | RunOptions, arg2?: RunOptions): Promise { - const [fetches, options] = this.typeNarrowingForRunStep( - this.trainingInputNames, - this.trainingOutputNames, - feeds, - arg1, - arg2, - ); - const results = await this.handler.runTrainStep(feeds, fetches, options); - return this.convertHandlerReturnTypeToMapOfTensors(results); - } - - async runOptimizerStep(options?: InferenceSession.RunOptions | undefined): Promise { - if (this.hasOptimizerModel) { - await this.handler.runOptimizerStep(options || {}); - } else { - throw new Error('This TrainingSession has no OptimizerModel loaded.'); - } - } - - runEvalStep(feeds: FeedsType, options?: RunOptions | undefined): Promise; - runEvalStep(feeds: FeedsType, fetches: FetchesType, options?: RunOptions | undefined): Promise; - async runEvalStep(feeds: FeedsType, arg1?: FetchesType | RunOptions, arg2?: RunOptions): Promise { - if (this.hasEvalModel) { - const [fetches, options] = this.typeNarrowingForRunStep( - this.evalInputNames, - this.evalOutputNames, - feeds, - arg1, - arg2, - ); - const results = await this.handler.runEvalStep(feeds, fetches, options); - return this.convertHandlerReturnTypeToMapOfTensors(results); - } else { - throw new Error('This TrainingSession has no EvalModel loaded.'); - } - } - - async getParametersSize(trainableOnly = true): Promise { - return this.handler.getParametersSize(trainableOnly); - } - - async loadParametersBuffer(array: Uint8Array, trainableOnly = true): Promise { - const paramsSize = await this.getParametersSize(trainableOnly); - // checking that the size of the Uint8Array is equivalent to the byte length of a Float32Array of the number - // of parameters - if (array.length !== 4 * paramsSize) { - throw new Error( - 'Size of the buffer passed into loadParametersBuffer must match the number of parameters in ' + - 'the model. Please use getParametersSize method to check.', - ); - } - return this.handler.loadParametersBuffer(array, trainableOnly); - } - - async getContiguousParameters(trainableOnly = true): Promise { - return this.handler.getContiguousParameters(trainableOnly); - } - - async release(): Promise { - return this.handler.dispose(); - } -} diff --git a/js/common/lib/training-session.ts b/js/common/lib/training-session.ts deleted file mode 100644 index 45dcafc46deb5..0000000000000 --- a/js/common/lib/training-session.ts +++ /dev/null @@ -1,206 +0,0 @@ -// Copyright (c) Microsoft Corporation. All rights reserved. -// Licensed under the MIT License. - -import { InferenceSession } from './inference-session.js'; -import { OnnxValue } from './onnx-value.js'; -import { TrainingSession as TrainingSessionImpl } from './training-session-impl.js'; - -/* eslint-disable @typescript-eslint/no-redeclare */ - -export declare namespace TrainingSession { - /** - * Either URI file path (string) or Uint8Array containing model or checkpoint information. - */ - type UriOrBuffer = string | Uint8Array; -} - -/** - * Represent a runtime instance of an ONNX training session, - * which contains a model that can be trained, and, optionally, - * an eval and optimizer model. - */ -export interface TrainingSession { - // #region run() - - /** - * Lazily resets the gradients of all trainable parameters to zero. Should happen after the invocation of - * runOptimizerStep. - */ - lazyResetGrad(): Promise; - - /** - * Run TrainStep asynchronously with the given feeds and options. - * - * @param feeds - Representation of the model input. See type description of `InferenceSession.InputType` for - detail. - * @param options - Optional. A set of options that controls the behavior of model training. - * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding values. - */ - runTrainStep( - feeds: InferenceSession.FeedsType, - options?: InferenceSession.RunOptions, - ): Promise; - - /** - * Run a single train step with the given inputs and options. - * - * @param feeds - Representation of the model input. - * @param fetches - Representation of the model output. - * detail. - * @param options - Optional. A set of options that controls the behavior of model training. - * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding - values. - */ - runTrainStep( - feeds: InferenceSession.FeedsType, - fetches: InferenceSession.FetchesType, - options?: InferenceSession.RunOptions, - ): Promise; - - /** - * Runs a single optimizer step, which performs weight updates for the trainable parameters using the optimizer model. - * - * @param options - Optional. A set of options that controls the behavior of model optimizing. - */ - runOptimizerStep(options?: InferenceSession.RunOptions): Promise; - - /** - * Run a single eval step with the given inputs and options using the eval model. - * - * @param feeds - Representation of the model input. - * @param options - Optional. A set of options that controls the behavior of model eval step. - * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding - values. - */ - runEvalStep( - feeds: InferenceSession.FeedsType, - options?: InferenceSession.RunOptions, - ): Promise; - - /** - * Run a single eval step with the given inputs and options using the eval model. - * - * @param feeds - Representation of the model input. - * @param fetches - Representation of the model output. - * detail. - * @param options - Optional. A set of options that controls the behavior of model eval step. - * @returns A promise that resolves to a map, which uses output names as keys and OnnxValue as corresponding - values. - */ - runEvalStep( - feeds: InferenceSession.FeedsType, - fetches: InferenceSession.FetchesType, - options?: InferenceSession.RunOptions, - ): Promise; - - // #endregion - - // #region copy parameters - - /** - * Retrieves the size of all parameters for the training state. Calculates the total number of primitive (datatype of - * the parameters) elements of all the parameters in the training state. - * - * @param trainableOnly - When set to true, the size is calculated for trainable params only. Default value is true. - */ - getParametersSize(trainableOnly: boolean): Promise; - - /** - * Copies parameter values from the given buffer to the training state. Currently, only supporting models with - * parameters of type Float32. - * - * @param buffer - A Uint8Array representation of Float32 parameters. - * @param trainableOnly - True if trainable parameters only to be modified, false otherwise. Default value is true. - */ - loadParametersBuffer(buffer: Uint8Array, trainableOnly: boolean): Promise; - - /** - * Copies the model parameters to a contiguous buffer. Usually used in the context of Federated Learning. - * Currently, only supporting models with parameters of type Float32. - * - * @param trainableOnly - When set to true, only trainable parameters are copied. Trainable parameters are parameters - * for which requires_grad is set to true. Default value is true. - * @returns A promise that resolves to a Float32 OnnxValue of the requested parameters. - */ - getContiguousParameters(trainableOnly: boolean): Promise; - // #endregion - - // #region release() - - /** - * Release the inference session and the underlying resources. - */ - release(): Promise; - // #endregion - - // #region metadata - - /** - * Get input names of the loaded training model. - */ - readonly trainingInputNames: readonly string[]; - - /** - * Get output names of the loaded training model. - */ - readonly trainingOutputNames: readonly string[]; - - /** - * Get input names of the loaded eval model. Is an empty array if no eval model is loaded. - */ - readonly evalInputNames: readonly string[]; - - /** - * Get output names of the loaded eval model. Is an empty array if no eval model is loaded. - */ - readonly evalOutputNames: readonly string[]; - - // #endregion -} - -/** - * Represents the optional parameters that can be passed into the TrainingSessionFactory. - */ -export interface TrainingSessionCreateOptions { - /** - * URI or buffer for a .ckpt file that contains the checkpoint for the training model. - */ - checkpointState: TrainingSession.UriOrBuffer; - /** - * URI or buffer for the .onnx training file. - */ - trainModel: TrainingSession.UriOrBuffer; - /** - * Optional. URI or buffer for the .onnx optimizer model file. - */ - optimizerModel?: TrainingSession.UriOrBuffer; - /** - * Optional. URI or buffer for the .onnx eval model file. - */ - evalModel?: TrainingSession.UriOrBuffer; -} - -/** - * Defines method overload possibilities for creating a TrainingSession. - */ -export interface TrainingSessionFactory { - // #region create() - - /** - * Creates a new TrainingSession and asynchronously loads any models passed in through trainingOptions - * - * @param trainingOptions specify models and checkpoints to load into the Training Session - * @param sessionOptions specify configuration for training session behavior - * - * @returns Promise that resolves to a TrainingSession object - */ - create( - trainingOptions: TrainingSessionCreateOptions, - sessionOptions?: InferenceSession.SessionOptions, - ): Promise; - - // #endregion -} - -// eslint-disable-next-line @typescript-eslint/naming-convention -export const TrainingSession: TrainingSessionFactory = TrainingSessionImpl; diff --git a/js/common/lib/type-helper.ts b/js/common/lib/type-helper.ts new file mode 100644 index 0000000000000..845ba3018d443 --- /dev/null +++ b/js/common/lib/type-helper.ts @@ -0,0 +1,31 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +/** + * A helper type to get certain types if they are declared in global scope. + * + * For example, if you installed "@webgpu/types" as a dev dependency, then `TryGetTypeIfDeclared<'GPUDevice'>` will + * be type `GPUDevice`, otherwise it will be type `unknown`. + * + * + * We don't want to introduce "@webgpu/types" as a dependency of this package because: + * + * (1) For JavaScript users, it's not needed. For TypeScript users, they can install it as dev dependency themselves. + * + * (2) because "@webgpu/types" requires "@types/dom-webcodecs" as peer dependency when using TypeScript < v5.1 and its + * version need to be chosen carefully according to the TypeScript version being used. This means so far there is not a + * way to keep every TypeScript version happy. It turns out that we will easily broke users on some TypeScript version. + * + * for more info see https://github.com/gpuweb/types/issues/127 + * + * Update (2024-08-07): The reason (2) may be no longer valid. Most people should be using TypeScript >= 5.1 by now. + * However, we are still not sure whether introducing "@webgpu/types" as direct dependency is a good idea. We find this + * type helper is useful for TypeScript users. + * + * @ignore + */ +export type TryGetGlobalType = typeof globalThis extends { + [k in Name]: { prototype: infer T }; +} + ? T + : Fallback; diff --git a/js/common/typedoc.json b/js/common/typedoc.json index 088c7ba4053e6..f9c7e7b19db41 100644 --- a/js/common/typedoc.json +++ b/js/common/typedoc.json @@ -1,6 +1,7 @@ { "entryPoints": ["lib/index.ts"], "excludeInternal": true, + "intentionallyNotExported": ["TryGetGlobalType"], "name": "ONNX Runtime JavaScript API", "readme": "none", "cleanOutputDir": true diff --git a/js/node/package-lock.json b/js/node/package-lock.json index 239c0b1ba557b..6d3c96e579a47 100644 --- a/js/node/package-lock.json +++ b/js/node/package-lock.json @@ -276,12 +276,12 @@ "dev": true }, "node_modules/axios": { - "version": "1.6.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz", - "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==", + "version": "1.7.9", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.9.tgz", + "integrity": "sha512-LhLcE7Hbiryz8oMDdDptSrWowmB4Bl6RCt6sIJKpRB4XtVf0iEgewX3au/pJqm+Py1kCASkb/FFKjxQaLtxJvw==", "dev": true, "dependencies": { - "follow-redirects": "^1.15.0", + "follow-redirects": "^1.15.6", "form-data": "^4.0.0", "proxy-from-env": "^1.1.0" } @@ -455,9 +455,9 @@ "dev": true }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dependencies": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", @@ -1581,12 +1581,12 @@ "dev": true }, "axios": { - "version": "1.6.1", - "resolved": "https://registry.npmjs.org/axios/-/axios-1.6.1.tgz", - "integrity": "sha512-vfBmhDpKafglh0EldBEbVuoe7DyAavGSLWhuSm5ZSEKQnHhBf0xAAwybbNH1IkrJNGnS/VG4I5yxig1pCEXE4g==", + "version": "1.7.9", + "resolved": "https://registry.npmjs.org/axios/-/axios-1.7.9.tgz", + "integrity": "sha512-LhLcE7Hbiryz8oMDdDptSrWowmB4Bl6RCt6sIJKpRB4XtVf0iEgewX3au/pJqm+Py1kCASkb/FFKjxQaLtxJvw==", "dev": true, "requires": { - "follow-redirects": "^1.15.0", + "follow-redirects": "^1.15.6", "form-data": "^4.0.0", "proxy-from-env": "^1.1.0" } @@ -1725,9 +1725,9 @@ "dev": true }, "cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "requires": { "path-key": "^3.1.0", "shebang-command": "^2.0.0", diff --git a/js/node/script/install.js b/js/node/script/install.js index b15bc03840599..fef93f9169a2c 100644 --- a/js/node/script/install.js +++ b/js/node/script/install.js @@ -21,6 +21,7 @@ const os = require('os'); const fs = require('fs'); const path = require('path'); const tar = require('tar'); +const { execFileSync } = require('child_process'); const { Readable } = require('stream'); // commandline flag: @@ -58,10 +59,23 @@ if (NO_INSTALL || !shouldInstall) { // Step.2: Download the required binaries const artifactUrl = { - 11: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${ - ORT_VERSION - }.tgz`, - 12: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-cuda12-${ + get 11() { + // TODO: support ORT Cuda v11 binaries + throw new Error(`CUDA 11 binaries are not supported by this script yet. + +To use ONNX Runtime Node.js binding with CUDA v11 support, please follow the manual steps: + +1. Use "--onnxruntime-node-install-cuda=skip" to skip the auto installation. +2. Navigate to https://aiinfra.visualstudio.com/PublicPackages/_artifacts/feed/onnxruntime-cuda-11 +3. Download the binaries for your platform and architecture +4. Extract the following binaries to "node_modules/onnxruntime-node/bin/napi-v3/linux/x64: + - libonnxruntime_providers_tensorrt.so + - libonnxruntime_providers_shared.so + - libonnxruntime.so.${ORT_VERSION} + - libonnxruntime_providers_cuda.so +`); + }, + 12: `https://github.com/microsoft/onnxruntime/releases/download/v${ORT_VERSION}/onnxruntime-linux-x64-gpu-${ ORT_VERSION }.tgz`, }[INSTALL_CUDA_FLAG || tryGetCudaVersion()]; @@ -108,9 +122,27 @@ Use "--onnxruntime-node-install-cuda=skip" to skip the installation. You will st function tryGetCudaVersion() { // Should only return 11 or 12. - // TODO: try to get the CUDA version from the system ( `nvcc --version` ) + // try to get the CUDA version from the system ( `nvcc --version` ) + let ver = 12; + try { + const nvccVersion = execFileSync('nvcc', ['--version'], { encoding: 'utf8' }); + const match = nvccVersion.match(/release (\d+)/); + if (match) { + ver = parseInt(match[1]); + if (ver !== 11 && ver !== 12) { + throw new Error(`Unsupported CUDA version: ${ver}`); + } + } + } catch (e) { + if (e?.code === 'ENOENT') { + console.warn('`nvcc` not found. Assuming CUDA 12.'); + } else { + console.warn('Failed to detect CUDA version from `nvcc --version`:', e.message); + } + } - return 11; + // assume CUDA 12 if failed to detect + return ver; } function parseInstallCudaFlag() { diff --git a/js/node/tsconfig.json b/js/node/tsconfig.json index c154c3e148ed0..0401fb9609ad6 100644 --- a/js/node/tsconfig.json +++ b/js/node/tsconfig.json @@ -1,7 +1,8 @@ { "extends": "../tsconfig.json", "compilerOptions": { - "outDir": "dist" + "outDir": "dist", + "declaration": true }, "include": ["lib"] } diff --git a/js/package-lock.json b/js/package-lock.json index 594d0584ad80e..f4401c6e98c75 100644 --- a/js/package-lock.json +++ b/js/package-lock.json @@ -1573,9 +1573,9 @@ "dev": true }, "node_modules/cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dev": true, "dependencies": { "path-key": "^3.1.0", @@ -5922,9 +5922,9 @@ "dev": true }, "cross-spawn": { - "version": "7.0.3", - "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.3.tgz", - "integrity": "sha512-iRDPJKUPVEND7dHPO8rkbOnPpyDygcDFtWjpeWNCgy8WP2rXcxXL8TskReQl6OrB2G7+UJrags1q15Fudc7G6w==", + "version": "7.0.6", + "resolved": "https://registry.npmjs.org/cross-spawn/-/cross-spawn-7.0.6.tgz", + "integrity": "sha512-uV2QOWP2nWzsy2aMp8aRibhi9dlzF5Hgh5SHaB9OiTGEyDTiJJyx0uy51QXdyWbtAHNua4XJzUKca3OzKUd3vA==", "dev": true, "requires": { "path-key": "^3.1.0", diff --git a/js/react_native/android/build.gradle b/js/react_native/android/build.gradle index 825990eba0fb8..521866ff0f3e2 100644 --- a/js/react_native/android/build.gradle +++ b/js/react_native/android/build.gradle @@ -7,7 +7,7 @@ buildscript { } dependencies { - classpath 'com.android.tools.build:gradle:4.1.2' + classpath 'com.android.tools.build:gradle:7.4.2' // noinspection DifferentKotlinGradleVersion } } @@ -221,9 +221,8 @@ dependencies { api "com.facebook.react:react-native:" + REACT_NATIVE_VERSION api "org.mockito:mockito-core:2.28.2" - androidTestImplementation "androidx.test:runner:1.1.0" - androidTestImplementation "androidx.test:rules:1.1.0" - + androidTestImplementation "androidx.test:runner:1.5.2" + androidTestImplementation "androidx.test:rules:1.5.0" implementation "junit:junit:4.12" androidTestImplementation "com.linkedin.dexmaker:dexmaker-mockito-inline-extended:2.28.1" diff --git a/js/react_native/android/gradle.properties b/js/react_native/android/gradle.properties index 465b04d1f5813..8fe6e40d76911 100644 --- a/js/react_native/android/gradle.properties +++ b/js/react_native/android/gradle.properties @@ -4,7 +4,7 @@ # Specifies the JVM arguments used for the daemon process. # The setting is particularly useful for tweaking memory settings. # Default value: -Xmx1024m -XX:MaxPermSize=256m -# org.gradle.jvmargs=-Xmx2048m -XX:MaxPermSize=512m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 +org.gradle.jvmargs=-Xmx4096m -XX:+HeapDumpOnOutOfMemoryError -Dfile.encoding=UTF-8 # # When configured, Gradle will run in incubating parallel mode. # This option should only be used with decoupled projects. More details, visit diff --git a/js/react_native/android/gradle/wrapper/gradle-wrapper.jar b/js/react_native/android/gradle/wrapper/gradle-wrapper.jar index 62d4c053550b9..249e5832f090a 100644 Binary files a/js/react_native/android/gradle/wrapper/gradle-wrapper.jar and b/js/react_native/android/gradle/wrapper/gradle-wrapper.jar differ diff --git a/js/react_native/android/gradle/wrapper/gradle-wrapper.properties b/js/react_native/android/gradle/wrapper/gradle-wrapper.properties index 51d930a381f3a..012d6d90445b4 100644 --- a/js/react_native/android/gradle/wrapper/gradle-wrapper.properties +++ b/js/react_native/android/gradle/wrapper/gradle-wrapper.properties @@ -1,6 +1,6 @@ distributionBase=GRADLE_USER_HOME distributionPath=wrapper/dists -distributionSha256Sum=7faa7198769f872826c8ef4f1450f839ec27f0b4d5d1e51bade63667cbccd205 -distributionUrl=https\://services.gradle.org/distributions/gradle-6.8.3-bin.zip +distributionSha256Sum=cb87f222c5585bd46838ad4db78463a5c5f3d336e5e2b98dc7c0c586527351c2 +distributionUrl=https\://services.gradle.org/distributions/gradle-7.5-bin.zip zipStoreBase=GRADLE_USER_HOME zipStorePath=wrapper/dists diff --git a/js/react_native/android/gradlew b/js/react_native/android/gradlew index fbd7c515832da..a69d9cb6c2065 100755 --- a/js/react_native/android/gradlew +++ b/js/react_native/android/gradlew @@ -1,7 +1,7 @@ -#!/usr/bin/env sh +#!/bin/sh # -# Copyright 2015 the original author or authors. +# Copyright © 2015-2021 the original authors. # # Licensed under the Apache License, Version 2.0 (the "License"); # you may not use this file except in compliance with the License. @@ -17,67 +17,101 @@ # ############################################################################## -## -## Gradle start up script for UN*X -## +# +# Gradle start up script for POSIX generated by Gradle. +# +# Important for running: +# +# (1) You need a POSIX-compliant shell to run this script. If your /bin/sh is +# noncompliant, but you have some other compliant shell such as ksh or +# bash, then to run this script, type that shell name before the whole +# command line, like: +# +# ksh Gradle +# +# Busybox and similar reduced shells will NOT work, because this script +# requires all of these POSIX shell features: +# * functions; +# * expansions «$var», «${var}», «${var:-default}», «${var+SET}», +# «${var#prefix}», «${var%suffix}», and «$( cmd )»; +# * compound commands having a testable exit status, especially «case»; +# * various built-in commands including «command», «set», and «ulimit». +# +# Important for patching: +# +# (2) This script targets any POSIX shell, so it avoids extensions provided +# by Bash, Ksh, etc; in particular arrays are avoided. +# +# The "traditional" practice of packing multiple parameters into a +# space-separated string is a well documented source of bugs and security +# problems, so this is (mostly) avoided, by progressively accumulating +# options in "$@", and eventually passing that to Java. +# +# Where the inherited environment variables (DEFAULT_JVM_OPTS, JAVA_OPTS, +# and GRADLE_OPTS) rely on word-splitting, this is performed explicitly; +# see the in-line comments for details. +# +# There are tweaks for specific operating systems such as AIX, CygWin, +# Darwin, MinGW, and NonStop. +# +# (3) This script is generated from the Groovy template +# https://github.com/gradle/gradle/blob/master/subprojects/plugins/src/main/resources/org/gradle/api/internal/plugins/unixStartScript.txt +# within the Gradle project. +# +# You can find Gradle at https://github.com/gradle/gradle/. +# ############################################################################## # Attempt to set APP_HOME + # Resolve links: $0 may be a link -PRG="$0" -# Need this for relative symlinks. -while [ -h "$PRG" ] ; do - ls=`ls -ld "$PRG"` - link=`expr "$ls" : '.*-> \(.*\)$'` - if expr "$link" : '/.*' > /dev/null; then - PRG="$link" - else - PRG=`dirname "$PRG"`"/$link" - fi +app_path=$0 + +# Need this for daisy-chained symlinks. +while + APP_HOME=${app_path%"${app_path##*/}"} # leaves a trailing /; empty if no leading path + [ -h "$app_path" ] +do + ls=$( ls -ld "$app_path" ) + link=${ls#*' -> '} + case $link in #( + /*) app_path=$link ;; #( + *) app_path=$APP_HOME$link ;; + esac done -SAVED="`pwd`" -cd "`dirname \"$PRG\"`/" >/dev/null -APP_HOME="`pwd -P`" -cd "$SAVED" >/dev/null + +APP_HOME=$( cd "${APP_HOME:-./}" && pwd -P ) || exit APP_NAME="Gradle" -APP_BASE_NAME=`basename "$0"` +APP_BASE_NAME=${0##*/} # Add default JVM options here. You can also use JAVA_OPTS and GRADLE_OPTS to pass JVM options to this script. DEFAULT_JVM_OPTS='"-Xmx64m" "-Xms64m"' # Use the maximum available, or set MAX_FD != -1 to use that value. -MAX_FD="maximum" +MAX_FD=maximum warn () { echo "$*" -} +} >&2 die () { echo echo "$*" echo exit 1 -} +} >&2 # OS specific support (must be 'true' or 'false'). cygwin=false msys=false darwin=false nonstop=false -case "`uname`" in - CYGWIN* ) - cygwin=true - ;; - Darwin* ) - darwin=true - ;; - MINGW* ) - msys=true - ;; - NONSTOP* ) - nonstop=true - ;; +case "$( uname )" in #( + CYGWIN* ) cygwin=true ;; #( + Darwin* ) darwin=true ;; #( + MSYS* | MINGW* ) msys=true ;; #( + NONSTOP* ) nonstop=true ;; esac CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar @@ -87,9 +121,9 @@ CLASSPATH=$APP_HOME/gradle/wrapper/gradle-wrapper.jar if [ -n "$JAVA_HOME" ] ; then if [ -x "$JAVA_HOME/jre/sh/java" ] ; then # IBM's JDK on AIX uses strange locations for the executables - JAVACMD="$JAVA_HOME/jre/sh/java" + JAVACMD=$JAVA_HOME/jre/sh/java else - JAVACMD="$JAVA_HOME/bin/java" + JAVACMD=$JAVA_HOME/bin/java fi if [ ! -x "$JAVACMD" ] ; then die "ERROR: JAVA_HOME is set to an invalid directory: $JAVA_HOME @@ -98,7 +132,7 @@ Please set the JAVA_HOME variable in your environment to match the location of your Java installation." fi else - JAVACMD="java" + JAVACMD=java which java >/dev/null 2>&1 || die "ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. Please set the JAVA_HOME variable in your environment to match the @@ -106,80 +140,101 @@ location of your Java installation." fi # Increase the maximum file descriptors if we can. -if [ "$cygwin" = "false" -a "$darwin" = "false" -a "$nonstop" = "false" ] ; then - MAX_FD_LIMIT=`ulimit -H -n` - if [ $? -eq 0 ] ; then - if [ "$MAX_FD" = "maximum" -o "$MAX_FD" = "max" ] ; then - MAX_FD="$MAX_FD_LIMIT" - fi - ulimit -n $MAX_FD - if [ $? -ne 0 ] ; then - warn "Could not set maximum file descriptor limit: $MAX_FD" - fi - else - warn "Could not query maximum file descriptor limit: $MAX_FD_LIMIT" - fi +if ! "$cygwin" && ! "$darwin" && ! "$nonstop" ; then + case $MAX_FD in #( + max*) + MAX_FD=$( ulimit -H -n ) || + warn "Could not query maximum file descriptor limit" + esac + case $MAX_FD in #( + '' | soft) :;; #( + *) + ulimit -n "$MAX_FD" || + warn "Could not set maximum file descriptor limit to $MAX_FD" + esac fi -# For Darwin, add options to specify how the application appears in the dock -if $darwin; then - GRADLE_OPTS="$GRADLE_OPTS \"-Xdock:name=$APP_NAME\" \"-Xdock:icon=$APP_HOME/media/gradle.icns\"" -fi +# Collect all arguments for the java command, stacking in reverse order: +# * args from the command line +# * the main class name +# * -classpath +# * -D...appname settings +# * --module-path (only if needed) +# * DEFAULT_JVM_OPTS, JAVA_OPTS, and GRADLE_OPTS environment variables. # For Cygwin or MSYS, switch paths to Windows format before running java -if [ "$cygwin" = "true" -o "$msys" = "true" ] ; then - APP_HOME=`cygpath --path --mixed "$APP_HOME"` - CLASSPATH=`cygpath --path --mixed "$CLASSPATH"` - - JAVACMD=`cygpath --unix "$JAVACMD"` - - # We build the pattern for arguments to be converted via cygpath - ROOTDIRSRAW=`find -L / -maxdepth 1 -mindepth 1 -type d 2>/dev/null` - SEP="" - for dir in $ROOTDIRSRAW ; do - ROOTDIRS="$ROOTDIRS$SEP$dir" - SEP="|" - done - OURCYGPATTERN="(^($ROOTDIRS))" - # Add a user-defined pattern to the cygpath arguments - if [ "$GRADLE_CYGPATTERN" != "" ] ; then - OURCYGPATTERN="$OURCYGPATTERN|($GRADLE_CYGPATTERN)" - fi +if "$cygwin" || "$msys" ; then + APP_HOME=$( cygpath --path --mixed "$APP_HOME" ) + CLASSPATH=$( cygpath --path --mixed "$CLASSPATH" ) + + JAVACMD=$( cygpath --unix "$JAVACMD" ) + # Now convert the arguments - kludge to limit ourselves to /bin/sh - i=0 - for arg in "$@" ; do - CHECK=`echo "$arg"|egrep -c "$OURCYGPATTERN" -` - CHECK2=`echo "$arg"|egrep -c "^-"` ### Determine if an option - - if [ $CHECK -ne 0 ] && [ $CHECK2 -eq 0 ] ; then ### Added a condition - eval `echo args$i`=`cygpath --path --ignore --mixed "$arg"` - else - eval `echo args$i`="\"$arg\"" + for arg do + if + case $arg in #( + -*) false ;; # don't mess with options #( + /?*) t=${arg#/} t=/${t%%/*} # looks like a POSIX filepath + [ -e "$t" ] ;; #( + *) false ;; + esac + then + arg=$( cygpath --path --ignore --mixed "$arg" ) fi - i=`expr $i + 1` + # Roll the args list around exactly as many times as the number of + # args, so each arg winds up back in the position where it started, but + # possibly modified. + # + # NB: a `for` loop captures its iteration list before it begins, so + # changing the positional parameters here affects neither the number of + # iterations, nor the values presented in `arg`. + shift # remove old arg + set -- "$@" "$arg" # push replacement arg done - case $i in - 0) set -- ;; - 1) set -- "$args0" ;; - 2) set -- "$args0" "$args1" ;; - 3) set -- "$args0" "$args1" "$args2" ;; - 4) set -- "$args0" "$args1" "$args2" "$args3" ;; - 5) set -- "$args0" "$args1" "$args2" "$args3" "$args4" ;; - 6) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" ;; - 7) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" ;; - 8) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" ;; - 9) set -- "$args0" "$args1" "$args2" "$args3" "$args4" "$args5" "$args6" "$args7" "$args8" ;; - esac fi -# Escape application args -save () { - for i do printf %s\\n "$i" | sed "s/'/'\\\\''/g;1s/^/'/;\$s/\$/' \\\\/" ; done - echo " " -} -APP_ARGS=`save "$@"` +# Collect all arguments for the java command; +# * $DEFAULT_JVM_OPTS, $JAVA_OPTS, and $GRADLE_OPTS can contain fragments of +# shell script including quotes and variable substitutions, so put them in +# double quotes to make sure that they get re-expanded; and +# * put everything else in single quotes, so that it's not re-expanded. + +set -- \ + "-Dorg.gradle.appname=$APP_BASE_NAME" \ + -classpath "$CLASSPATH" \ + org.gradle.wrapper.GradleWrapperMain \ + "$@" + +# Stop when "xargs" is not available. +if ! command -v xargs >/dev/null 2>&1 +then + die "xargs is not available" +fi + +# Use "xargs" to parse quoted args. +# +# With -n1 it outputs one arg per line, with the quotes and backslashes removed. +# +# In Bash we could simply go: +# +# readarray ARGS < <( xargs -n1 <<<"$var" ) && +# set -- "${ARGS[@]}" "$@" +# +# but POSIX shell has neither arrays nor command substitution, so instead we +# post-process each arg (as a line of input to sed) to backslash-escape any +# character that might be a shell metacharacter, then use eval to reverse +# that process (while maintaining the separation between arguments), and wrap +# the whole thing up as a single "set" statement. +# +# This will of course break if any of these variables contains a newline or +# an unmatched quote. +# -# Collect all arguments for the java command, following the shell quoting and substitution rules -eval set -- $DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS "\"-Dorg.gradle.appname=$APP_BASE_NAME\"" -classpath "\"$CLASSPATH\"" org.gradle.wrapper.GradleWrapperMain "$APP_ARGS" +eval "set -- $( + printf '%s\n' "$DEFAULT_JVM_OPTS $JAVA_OPTS $GRADLE_OPTS" | + xargs -n1 | + sed ' s~[^-[:alnum:]+,./:=@_]~\\&~g; ' | + tr '\n' ' ' + )" '"$@"' exec "$JAVACMD" "$@" diff --git a/js/react_native/android/gradlew.bat b/js/react_native/android/gradlew.bat index 5093609d512a9..f127cfd49d402 100644 --- a/js/react_native/android/gradlew.bat +++ b/js/react_native/android/gradlew.bat @@ -14,7 +14,7 @@ @rem limitations under the License. @rem -@if "%DEBUG%" == "" @echo off +@if "%DEBUG%"=="" @echo off @rem ########################################################################## @rem @rem Gradle startup script for Windows @@ -25,7 +25,7 @@ if "%OS%"=="Windows_NT" setlocal set DIRNAME=%~dp0 -if "%DIRNAME%" == "" set DIRNAME=. +if "%DIRNAME%"=="" set DIRNAME=. set APP_BASE_NAME=%~n0 set APP_HOME=%DIRNAME% @@ -40,7 +40,7 @@ if defined JAVA_HOME goto findJavaFromJavaHome set JAVA_EXE=java.exe %JAVA_EXE% -version >NUL 2>&1 -if "%ERRORLEVEL%" == "0" goto init +if %ERRORLEVEL% equ 0 goto execute echo. echo ERROR: JAVA_HOME is not set and no 'java' command could be found in your PATH. @@ -54,7 +54,7 @@ goto fail set JAVA_HOME=%JAVA_HOME:"=% set JAVA_EXE=%JAVA_HOME%/bin/java.exe -if exist "%JAVA_EXE%" goto init +if exist "%JAVA_EXE%" goto execute echo. echo ERROR: JAVA_HOME is set to an invalid directory: %JAVA_HOME% @@ -64,21 +64,6 @@ echo location of your Java installation. goto fail -:init -@rem Get command-line arguments, handling Windows variants - -if not "%OS%" == "Windows_NT" goto win9xME_args - -:win9xME_args -@rem Slurp the command line arguments. -set CMD_LINE_ARGS= -set _SKIP=2 - -:win9xME_args_slurp -if "x%~1" == "x" goto execute - -set CMD_LINE_ARGS=%* - :execute @rem Setup the command line @@ -86,17 +71,19 @@ set CLASSPATH=%APP_HOME%\gradle\wrapper\gradle-wrapper.jar @rem Execute Gradle -"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %CMD_LINE_ARGS% +"%JAVA_EXE%" %DEFAULT_JVM_OPTS% %JAVA_OPTS% %GRADLE_OPTS% "-Dorg.gradle.appname=%APP_BASE_NAME%" -classpath "%CLASSPATH%" org.gradle.wrapper.GradleWrapperMain %* :end @rem End local scope for the variables with windows NT shell -if "%ERRORLEVEL%"=="0" goto mainEnd +if %ERRORLEVEL% equ 0 goto mainEnd :fail rem Set variable GRADLE_EXIT_CONSOLE if you need the _script_ return code instead of rem the _cmd.exe /c_ return code! -if not "" == "%GRADLE_EXIT_CONSOLE%" exit 1 -exit /b 1 +set EXIT_CODE=%ERRORLEVEL% +if %EXIT_CODE% equ 0 set EXIT_CODE=1 +if not ""=="%GRADLE_EXIT_CONSOLE%" exit %EXIT_CODE% +exit /b %EXIT_CODE% :mainEnd if "%OS%"=="Windows_NT" endlocal diff --git a/js/react_native/e2e/android/app/build.gradle b/js/react_native/e2e/android/app/build.gradle index 8a84b0d5065a8..526259e3f8d8f 100644 --- a/js/react_native/e2e/android/app/build.gradle +++ b/js/react_native/e2e/android/app/build.gradle @@ -193,7 +193,7 @@ dependencies { implementation "com.facebook.react:react-native:+" // From node_modules implementation "androidx.swiperefreshlayout:swiperefreshlayout:1.0.0" - implementation 'androidx.test.ext:junit:1.1.3' + implementation 'androidx.test.ext:junit:1.1.5' debugImplementation("com.facebook.flipper:flipper:${FLIPPER_VERSION}") { exclude group:'com.facebook.fbjni' } @@ -213,9 +213,9 @@ dependencies { implementation jscFlavor } - androidTestImplementation 'androidx.test.espresso:espresso-core:3.4.0' - androidTestImplementation 'androidx.test:runner:1.4.0' - androidTestImplementation 'androidx.test:rules:1.4.0' + androidTestImplementation "androidx.test.espresso:espresso-core:3.5.0" + androidTestImplementation "androidx.test:runner:1.5.2" + androidTestImplementation "androidx.test:rules:1.5.0" implementation project(':onnxruntime-react-native') // specify ORT dependency here so it can be found in libs flatDir repository diff --git a/js/web/docs/webgpu-operators.md b/js/web/docs/webgpu-operators.md index f63cf17aa4df3..5c8748d75c2bc 100644 --- a/js/web/docs/webgpu-operators.md +++ b/js/web/docs/webgpu-operators.md @@ -50,6 +50,7 @@ Do not modify directly.* | Gather | ai.onnx(1-10,11-12,13+) | | | GatherBlockQuantized | com.microsoft(1+) | | | GatherElements | ai.onnx(11-12,13+) | | +| GatherND | ai.onnx(11,12,13+) | | | Gelu | ai.onnx(20+); com.microsoft(1+) | | | Gemm | ai.onnx(7-8,9-10,11-12,13+) | | | GlobalAveragePool | ai.onnx(1+); com.ms.internal.nhwc(1+) | | diff --git a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts index 28af5d461abe0..6c7afbc7365bb 100644 --- a/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts +++ b/js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts @@ -16,6 +16,7 @@ import { einsum, parseEinsumAttributes } from './ops/einsum'; import { expand } from './ops/expand'; import { fastGelu } from './ops/fast-gelu'; import { gather, parseGatherAttributes } from './ops/gather'; +import { gatherND, parseGatherNDAttributes } from './ops/gather-nd'; import { gatherBlockQuantized, parseGatherBlockQuantizedAttributes } from './ops/gather-block-quantized'; import { gatherElements, parseGatherElementsAttributes } from './ops/gather-elements'; import { gemm, parseGemmAttributes } from './ops/gemm'; @@ -100,6 +101,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map = new ['Gather', [gather, parseGatherAttributes]], ['GatherElements', [gatherElements, parseGatherElementsAttributes]], ['GatherBlockQuantized', [gatherBlockQuantized, parseGatherBlockQuantizedAttributes]], + ['GatherND', [gatherND, parseGatherNDAttributes]], ['Gelu', [unaryOps.gelu]], ['Gemm', [gemm, parseGemmAttributes]], ['GlobalAveragePool', [pool.globalAveragePool, pool.parseGlobalAveragePoolAttributes]], diff --git a/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts b/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts new file mode 100644 index 0000000000000..43b51f6e94a66 --- /dev/null +++ b/js/web/lib/wasm/jsep/webgpu/ops/gather-nd.ts @@ -0,0 +1,179 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +import { DataType } from '../../../wasm-common'; +import { TensorView } from '../../tensor-view'; +import { ShapeUtil } from '../../util'; +import { AttributeWithCacheKey } from '../attribute-with-cache-key'; +import { ComputeContext, ProgramUniform } from '../types'; + +import { createTensorShapeVariables, inputVariable, outputVariable, ShaderHelper, UniformsArrayType } from './common'; + +export interface GatherNDAttributes extends AttributeWithCacheKey { + readonly batchDims: number; +} + +const computeSliceOffsets = ( + context: ComputeContext, + indicesData: TensorView, + sizesFromSliceDimsData: number[], + batchDims: number, + inputDims: readonly number[], + numSlices: number, + numSlicesPerBatch: number, + inputBatchStride: number, + numSliceDims: number, +) => { + const programUniforms: ProgramUniform[] = [ + { type: DataType.uint32, data: numSlices }, + { type: DataType.uint32, data: batchDims }, + { type: DataType.uint32, data: inputDims }, + { type: DataType.uint32, data: sizesFromSliceDimsData }, + { type: DataType.uint32, data: numSlicesPerBatch }, + { type: DataType.uint32, data: inputBatchStride }, + { type: DataType.uint32, data: numSliceDims }, + ]; + + const outputShape = [numSlices]; + programUniforms.push(...createTensorShapeVariables(indicesData.dims, outputShape)); + + const getShaderSource = (shaderHelper: ShaderHelper) => { + const indices = inputVariable('indices_data', indicesData.dataType, indicesData.dims.length); + const output = outputVariable('input_slice_offsets_data', DataType.uint32, 1, 1); + const variables = [indices, output]; + const uniforms: UniformsArrayType = [ + { name: 'output_size', type: 'u32' }, + { name: 'batch_dims', type: 'u32' }, + { name: 'input_dims', type: 'u32', length: inputDims.length }, + { name: 'sizes_from_slice_dims_data', type: 'u32', length: sizesFromSliceDimsData.length }, + { name: 'num_slices_per_batch', type: 'u32' }, + { name: 'input_batch_stride', type: 'u32' }, + { name: 'num_slice_dims', type: 'u32' }, + ]; + return ` + ${shaderHelper.registerUniforms(uniforms).declareVariables(...variables)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} + let batch_idx = global_idx / uniforms.num_slices_per_batch; + let base_offset = batch_idx * uniforms.input_batch_stride; + + let slice_indices_base_offset = global_idx * uniforms.num_slice_dims; + var relative_slice_offset = 0; + for (var dim_idx = 0u; dim_idx < uniforms.num_slice_dims; dim_idx ++) { + var index = i32(indices_data[dim_idx + slice_indices_base_offset].x); + let input_dim_idx = uniforms.batch_dims + dim_idx; + if (index < 0) { + ${ + inputDims.length === 1 + ? 'index += i32(uniforms.input_dims);' + : 'index += i32(uniforms.input_dims[input_dim_idx]);' + } + } + ${ + sizesFromSliceDimsData.length === 1 + ? 'relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data);' + : 'relative_slice_offset += index * i32(uniforms.sizes_from_slice_dims_data[dim_idx]);' + } + } + + input_slice_offsets_data[global_idx] = base_offset + u32(relative_slice_offset); + }`; + }; + + return context.compute( + { + name: 'computeSliceOffsets', + shaderCache: { hint: `${inputDims.length}_${sizesFromSliceDimsData.length}`, inputDependencies: ['rank'] }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: context.inputs[1].dataType }], + dispatchGroup: { x: Math.ceil(numSlices / 64) }, + programUniforms, + }), + getShaderSource, + }, + { inputs: [indicesData], outputs: [-1] }, + )[0]; +}; + +export const gatherND = (context: ComputeContext, attributes: GatherNDAttributes) => { + const inputs = context.inputs; + const inputShape = inputs[0].dims; + const inputType = inputs[0].dataType; + const indicesShape = inputs[1].dims; + const numSliceDims = indicesShape[indicesShape.length - 1]; + const numSlices = ShapeUtil.sizeToDimension(indicesShape, indicesShape.length - 1); + const sliceSize = ShapeUtil.sizeFromDimension(inputShape, attributes.batchDims + numSliceDims); + const numBatches = ShapeUtil.sizeToDimension(inputShape, attributes.batchDims); + const inputBatchStride = ShapeUtil.sizeFromDimension(inputShape, attributes.batchDims); + const numSlicesPerBatch = numSlices / numBatches; + const sizesFromSliceDims = new Array(numSliceDims); + let runningProduct = sliceSize; + for (let i = 0; i < numSliceDims; ++i) { + sizesFromSliceDims[numSliceDims - 1 - i] = runningProduct; + runningProduct *= inputShape[attributes.batchDims + numSliceDims - 1 - i]; + } + + const inputSliceOffsets = computeSliceOffsets( + context, + inputs[1], + sizesFromSliceDims, + attributes.batchDims, + inputShape, + numSlices, + numSlicesPerBatch, + inputBatchStride, + numSliceDims, + ); + + const lastIndicesDimension = attributes.batchDims + numSliceDims; + if (lastIndicesDimension > inputShape.length) { + throw new Error('last dimension of indices must not be larger than rank of input tensor'); + } + + const outputShape = indicesShape.slice(0, -1).concat(inputShape.slice(lastIndicesDimension)); + const outputSize = ShapeUtil.size(outputShape); + + const programUniforms: ProgramUniform[] = [ + { type: DataType.uint32, data: outputSize }, + { type: DataType.uint32, data: sliceSize }, + ...createTensorShapeVariables(inputs[0].dims, inputSliceOffsets.dims, outputShape), + ]; + + const getShaderSource = (shaderHelper: ShaderHelper) => { + const input = inputVariable('data', inputs[0].dataType, inputs[0].dims.length); + const indices = inputVariable('slice_offsets', DataType.uint32, inputSliceOffsets.dims.length); + + const output = outputVariable('output', inputs[0].dataType, outputShape.length); + return ` + ${shaderHelper + .registerUniform('output_size', 'u32') + .registerUniform('slice_size', 'u32') + .declareVariables(input, indices, output)} + ${shaderHelper.mainStart()} + ${shaderHelper.guardAgainstOutOfBoundsWorkgroupSizes('uniforms.output_size')} + let slice_offset = slice_offsets[global_idx / uniforms.slice_size]; + output[global_idx] = data[u32(slice_offset) + global_idx % uniforms.slice_size]; + }`; + }; + context.compute( + { + name: 'GatherND', + shaderCache: { hint: attributes.cacheKey, inputDependencies: ['rank', 'rank'] }, + getRunData: () => ({ + outputs: [{ dims: outputShape, dataType: inputType }], + dispatchGroup: { x: Math.ceil(outputSize / 64 /* workgroup size */) }, + programUniforms, + }), + getShaderSource, + }, + { inputs: [inputs[0], inputSliceOffsets] }, + ); +}; + +export const parseGatherNDAttributes = (attributes: Record): GatherNDAttributes => { + const batchDims = attributes.batch_dims as number; + return { + batchDims, + cacheKey: '', + }; +}; diff --git a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts index 21225a77b189b..5059645211aea 100644 --- a/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts +++ b/js/web/lib/wasm/jsep/webgpu/ops/transpose.ts @@ -29,7 +29,9 @@ const permFunctionBody = (perm: number[], rank: number, input: IndicesHelper, ou let reverseFunc = `fn perm(i: ${output.type.indices}) -> ${input.type.indices} { var a: ${input.type.indices};`; for (let i = 0; i < rank; ++i) { - reverseFunc += input.indicesSet('a', perm[i], `i[${i}]`); + // input indices and output indices should always be larger or equal to 2, + // so indexer is always valid to be used on `a` and `i`. + reverseFunc += `a[${perm[i]}]=i[${i}];`; } return (reverseFunc += 'return a;}'); }; @@ -71,7 +73,7 @@ export const createTransposeProgramInfo = (inputTensor: TensorView, permAttr: nu const outputShape = getOutputShape(inputTensor.dims, perm); let newInputShape = inputTensor.dims; let newOutputShape = outputShape; - const transposeAsReshape = isTransposeReshape(perm, inputTensor.dims); + const transposeAsReshape = inputRank < 2 || isTransposeReshape(perm, inputTensor.dims); let getShaderSource; if (transposeAsReshape) { getShaderSource = (shaderHelper: ShaderHelper) => { diff --git a/js/web/lib/wasm/wasm-core-impl.ts b/js/web/lib/wasm/wasm-core-impl.ts index 81d1b73efc9d4..da8939cd0263a 100644 --- a/js/web/lib/wasm/wasm-core-impl.ts +++ b/js/web/lib/wasm/wasm-core-impl.ts @@ -487,7 +487,7 @@ export const prepareInputOutputTensor = ( } if (location === 'gpu-buffer') { - const gpuBuffer = tensor[2].gpuBuffer as GPUBuffer; + const gpuBuffer = tensor[2].gpuBuffer; dataByteLength = calculateTensorSizeInBytes(tensorDataTypeStringToEnum(dataType), dims)!; const registerBuffer = wasm.jsepRegisterBuffer; diff --git a/js/web/package.json b/js/web/package.json index 656cd7b56b039..181d6127f5455 100644 --- a/js/web/package.json +++ b/js/web/package.json @@ -83,7 +83,7 @@ "types": "./types.d.ts" }, "./wasm": { - "import": "./dist/ort.wasm.min.mjs", + "import": "./dist/ort.wasm.bundle.min.mjs", "require": "./dist/ort.wasm.min.js", "types": "./types.d.ts" }, diff --git a/js/web/script/build.ts b/js/web/script/build.ts index 408f9e00a5cbd..529e9d1065e69 100644 --- a/js/web/script/build.ts +++ b/js/web/script/build.ts @@ -591,14 +591,14 @@ async function main() { // ort[.min].[m]js await addAllWebBuildTasks({ outputName: 'ort', - define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true' }, + define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true' }, }); // ort.bundle.min.mjs await buildOrt({ isProduction: true, outputName: 'ort.bundle', format: 'esm', - define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_DYNAMIC_IMPORT': 'true' }, + define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_WEBGL': 'true', 'BUILD_DEFS.DISABLE_DYNAMIC_IMPORT': 'true' }, }); // ort.webgpu[.min].[m]js @@ -619,6 +619,13 @@ async function main() { outputName: 'ort.wasm', define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_WEBGL': 'true' }, }); + // ort.wasm.bundle.min.mjs + await buildOrt({ + isProduction: true, + outputName: 'ort.wasm.bundle', + format: 'esm', + define: { ...DEFAULT_DEFINE, 'BUILD_DEFS.DISABLE_JSEP': 'true', 'BUILD_DEFS.DISABLE_WEBGL': 'true' }, + }); // ort.webgl[.min].[m]js await addAllWebBuildTasks({ outputName: 'ort.webgl', diff --git a/js/web/test/data/ops/gather-nd.jsonc b/js/web/test/data/ops/gather-nd.jsonc new file mode 100644 index 0000000000000..209c7d1f74087 --- /dev/null +++ b/js/web/test/data/ops/gather-nd.jsonc @@ -0,0 +1,147 @@ +[ + { + "name": "GatherND int32", + "operator": "GatherND", + "attributes": [], + "cases": [ + { + "name": "data[4] indices[]", + "inputs": [ + { + "data": [100, 101, 102, 777, 778, 779, 1000, 1001, 1002], + "dims": [9], + "type": "int32" + }, + { + "data": [0, 4, 8], + "dims": [3, 1], + "type": "int64" + } + ], + "outputs": [ + { + "data": [100, 778, 1002], + "dims": [3], + "type": "int32" + } + ] + } + ] + }, + { + "name": "GatherND float32", + "operator": "GatherND", + "attributes": [], + "cases": [ + { + "name": "data[4] indices[]", + "inputs": [ + { + "data": [100.1, 101.2, 102.3, 777.4, 778.5, 779.6, 1000.7, 1001.8, 1002.9], + "dims": [9], + "type": "float32" + }, + { + "data": [0, 4, 8], + "dims": [3, 1], + "type": "int64" + } + ], + "outputs": [ + { + "data": [100.0999984741211, 778.5, 1002.9000244140625], + "dims": [3], + "type": "float32" + } + ] + } + ] + }, + { + "name": "GatherND int32 [2 2 2], batch_dims", + "operator": "GatherND", + "attributes": [{ "name": "batch_dims", "data": 1, "type": "int" }], + "cases": [ + { + "name": "data[4] indices[]", + "inputs": [ + { + "data": [0, 1, 2, 3, 4, 5, 6, 7], + "dims": [2, 2, 2], + "type": "int32" + }, + { + "data": [1, 0], + "dims": [2, 1], + "type": "int64" + } + ], + "outputs": [ + { + "data": [2, 3, 4, 5], + "dims": [2, 2], + "type": "int32" + } + ] + } + ] + }, + { + "name": "GatherND float16", + "operator": "GatherND", + "attributes": [], + "cases": [ + { + "name": "data[4] indices[]", + "inputs": [ + { + "data": [100.1, 101.2, 102.3, 777.4, 778.5, 779.6, 1000.7, 1001.8, 1002.9], + "dims": [9], + "type": "float16" + }, + { + "data": [0, 4, 8], + "dims": [3, 1], + "type": "int64" + } + ], + "outputs": [ + { + "data": [100.0999984741211, 778.5, 1002.9000244140625], + "dims": [3], + "type": "float16" + } + ] + } + ] + }, + { + "name": "GatherND uint32 [2 2 2], batch_dims", + "operator": "GatherND", + "attributes": [{ "name": "batch_dims", "data": 1, "type": "int" }], + "cases": [ + { + "name": "data[4] indices[]", + "inputs": [ + { + "data": [0, 1, 2, 3, 4, 5, 6, 7], + "dims": [2, 2, 2], + "type": "uint32" + }, + { + "data": [1, 0], + "dims": [2, 1], + "type": "int64" + } + ], + "outputs": [ + { + "data": [2, 3, 4, 5], + "dims": [2, 2], + "type": "uint32" + } + ] + } + ] + } +] diff --git a/js/web/test/e2e/browser-test-wasm-binary-override.js b/js/web/test/e2e/browser-test-wasm-binary-override.js index 471c26f6990b5..27cce2ca06236 100644 --- a/js/web/test/e2e/browser-test-wasm-binary-override.js +++ b/js/web/test/e2e/browser-test-wasm-binary-override.js @@ -7,7 +7,7 @@ const documentUrl = document.currentScript.src; it('Browser E2E testing - WebAssembly backend', async function () { // preload .wasm file binary - const wasmUrl = new URL('./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.wasm', documentUrl).href; + const wasmUrl = new URL('./node_modules/onnxruntime-web/dist/ort-wasm-simd-threaded.jsep.wasm', documentUrl).href; const response = await fetch(wasmUrl); // make sure the .wasm file is loaded successfully diff --git a/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js b/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js new file mode 100644 index 0000000000000..d325a5ca7187d --- /dev/null +++ b/js/web/test/e2e/browser-test-wasm-path-override-filename-jsep.js @@ -0,0 +1,28 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +'use strict'; + +it('Browser E2E testing - WebAssembly backend (path override filename)', async function () { + // check base URL port from test args + if (typeof __ort_arg_port === 'undefined') { + throw new Error('test flag --port= is required'); + } + const base = `http://localhost:${__ort_arg_port}/`; + + ort.env.wasm.wasmPaths = {}; + + if (typeof __ort_arg_files === 'string' && __ort_arg_files.includes('wasm')) { + const overrideWasmUrl = new URL('./test-wasm-path-override/jsep-renamed.wasm', base).href; + console.log(`ort.env.wasm.wasmPaths['wasm'] = ${JSON.stringify(overrideWasmUrl)};`); + ort.env.wasm.wasmPaths.wasm = overrideWasmUrl; + } + + if (typeof __ort_arg_files === 'string' && __ort_arg_files.includes('mjs')) { + const overrideMjsUrl = new URL('./test-wasm-path-override/jsep-renamed.mjs', base).href; + console.log(`ort.env.wasm.wasmPaths['mjs'] = ${JSON.stringify(overrideMjsUrl)};`); + ort.env.wasm.wasmPaths.mjs = overrideMjsUrl; + } + + await testFunction(ort, { executionProviders: ['wasm'] }); +}); diff --git a/js/web/test/e2e/run-data.js b/js/web/test/e2e/run-data.js index 04079b042bc23..dbc3ca0bd2460 100644 --- a/js/web/test/e2e/run-data.js +++ b/js/web/test/e2e/run-data.js @@ -14,7 +14,7 @@ const NODEJS_TEST_CASES = [ // [test_for_same_origin, test_for_cross_origin, main_js, ort_main_js, [test_args]] const BROWSER_TEST_CASES = [ // IIFE - [true, true, './browser-test-webgl.js', 'ort.min.js'], // webgl + [true, true, './browser-test-webgl.js', 'ort.all.min.js'], // webgl [true, true, './browser-test-webgl.js', 'ort.webgl.min.js'], // webgl [true, true, './browser-test-wasm.js', 'ort.wasm.min.js'], // wasm, ort.wasm [true, true, './browser-test-wasm-multi-session-create.js', 'ort.min.js'], // wasm, multi-session create @@ -24,7 +24,7 @@ const BROWSER_TEST_CASES = [ [true, true, './browser-test-wasm.js', 'ort.min.js', ['num_threads=1', 'proxy=1']], // wasm, 1 thread, proxy // ort.min.mjs - [true, true, './browser-test-webgl.js', 'ort.min.mjs'], // webgl + [true, true, './browser-test-webgl.js', 'ort.webgl.min.mjs'], // webgl [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=1']], // wasm, 1 thread [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=2']], // wasm, 2 threads [true, true, './browser-test-wasm.js', 'ort.min.mjs', ['num_threads=2', 'proxy=1']], // wasm, 2 threads, proxy @@ -41,22 +41,22 @@ const BROWSER_TEST_CASES = [ // path override: // wasm, path override filenames for both mjs and wasm, same origin - [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=mjs,wasm']], + [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=mjs,wasm']], [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=mjs,wasm']], // wasm, path override filenames for both mjs and wasm, cross origin - [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=mjs,wasm']], + [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=mjs,wasm']], [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=mjs,wasm']], // wasm, path override filename for wasm, same origin - [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=wasm']], + [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=wasm']], [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=wasm']], // wasm, path override filename for wasm, cross origin - [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=wasm']], + [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=wasm']], [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=wasm']], // wasm, path override filename for mjs, same origin - [true, false, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=9876', 'files=mjs']], + [true, false, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=9876', 'files=mjs']], [true, false, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=9876', 'files=mjs']], // wasm, path override filename for mjs, cross origin - [false, true, './browser-test-wasm-path-override-filename.js', 'ort.min.js', ['port=8081', 'files=mjs']], + [false, true, './browser-test-wasm-path-override-filename-jsep.js', 'ort.min.js', ['port=8081', 'files=mjs']], [false, true, './browser-test-wasm-path-override-filename.js', 'ort.wasm.min.js', ['port=8081', 'files=mjs']], // wasm, path override prefix, same origin [true, false, './browser-test-wasm-path-override-prefix.js', 'ort.min.js', ['port=9876']], diff --git a/js/web/test/e2e/run.js b/js/web/test/e2e/run.js index 93f9d4a144bf2..3361bbece64ed 100644 --- a/js/web/test/e2e/run.js +++ b/js/web/test/e2e/run.js @@ -146,6 +146,10 @@ function prepareWasmPathOverrideFiles() { fs.copyFileSync(`${sourceFile}.wasm`, path.join(folder, 'ort-wasm-simd-threaded.wasm')); fs.copyFileSync(`${sourceFile}.mjs`, path.join(folder, 'renamed.mjs')); fs.copyFileSync(`${sourceFile}.wasm`, path.join(folder, 'renamed.wasm')); + fs.copyFileSync(`${sourceFile}.jsep.mjs`, path.join(folder, 'ort-wasm-simd-threaded.jsep.mjs')); + fs.copyFileSync(`${sourceFile}.jsep.wasm`, path.join(folder, 'ort-wasm-simd-threaded.jsep.wasm')); + fs.copyFileSync(`${sourceFile}.jsep.mjs`, path.join(folder, 'jsep-renamed.mjs')); + fs.copyFileSync(`${sourceFile}.jsep.wasm`, path.join(folder, 'jsep-renamed.wasm')); } async function testAllNodejsCases() { diff --git a/js/web/test/suite-test-list.jsonc b/js/web/test/suite-test-list.jsonc index 45fb771ee13bb..f179756967d49 100644 --- a/js/web/test/suite-test-list.jsonc +++ b/js/web/test/suite-test-list.jsonc @@ -1365,6 +1365,7 @@ "gather.jsonc", "gather-block-quantized.jsonc", "gather-elements.jsonc", + "gather-nd.jsonc", "gemm.jsonc", "global-average-pool.jsonc", "greater.jsonc", diff --git a/objectivec/error_utils.mm b/objectivec/error_utils.mm index 335cf8894d549..e8d4d5bb365c9 100644 --- a/objectivec/error_utils.mm +++ b/objectivec/error_utils.mm @@ -11,7 +11,7 @@ void ORTSaveCodeAndDescriptionToError(int code, const char* descriptionCstr, NSE if (!error) return; NSString* description = [NSString stringWithCString:descriptionCstr - encoding:NSASCIIStringEncoding]; + encoding:NSUTF8StringEncoding]; *error = [NSError errorWithDomain:kOrtErrorDomain code:code diff --git a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc index 29f328264bf3f..31f95ee64df5d 100644 --- a/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc +++ b/onnxruntime/contrib_ops/webgpu/quantization/matmul_nbits.cc @@ -1,6 +1,8 @@ // Copyright (c) Microsoft Corporation. All rights reserved. // Licensed under the MIT License. +#include + #include "contrib_ops/webgpu/quantization/matmul_nbits.h" #include "contrib_ops/webgpu/webgpu_contrib_kernels.h" #include "core/providers/cpu/math/matmul_helper.h" @@ -352,8 +354,11 @@ Status MatMulNBits::ComputeInternal(onnxruntime::webgpu::ComputeContext& context const uint32_t components_a = GetMaxComponents(K); const uint32_t components_b = GetMaxComponents(blob_size_in_words); uint32_t components = GetMaxComponents(N); - const bool is_intel = !std::strcmp(context.AdapterInfo().vendor, "intel") && !std::strcmp(context.AdapterInfo().architecture, "gen-12lp"); - const bool use_block32 = is_intel && block_size == 32; + + // Use block32 for Intel Gen12LP architecture. + const bool use_block32 = context.AdapterInfo().vendor == std::string_view{"intel"} && + context.AdapterInfo().architecture == std::string_view{"gen-12lp"} && + block_size == 32; const bool has_zero_points = zero_points != nullptr; // TODO: Support output_number > 1. Some cases are failed when output_number > 1. // const uint32_t output_number = M > 1 && (N / components) % 2 == 0 ? 2 : 1; diff --git a/onnxruntime/core/providers/js/js_execution_provider.cc b/onnxruntime/core/providers/js/js_execution_provider.cc index c3c99c7d6855a..c1a8b373bed84 100644 --- a/onnxruntime/core/providers/js/js_execution_provider.cc +++ b/onnxruntime/core/providers/js/js_execution_provider.cc @@ -341,6 +341,10 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, Gat class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, GatherElements); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherElements); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 11, GatherND); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 12, 12, GatherND); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 13, GatherND); + class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 1, 9, Slice); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 10, 10, Slice); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kJsExecutionProvider, kOnnxDomain, 11, 12, Slice); @@ -667,6 +671,10 @@ std::unique_ptr RegisterKernels() { BuildKernelCreateInfo, BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/js/operators/gather_nd.cc b/onnxruntime/core/providers/js/operators/gather_nd.cc new file mode 100644 index 0000000000000..ee69100cc658e --- /dev/null +++ b/onnxruntime/core/providers/js/operators/gather_nd.cc @@ -0,0 +1,41 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/js/js_kernel.h" +#include "core/providers/js/js_data_types.h" +#include "gather_nd.h" + +namespace onnxruntime { +namespace js { + +ONNX_OPERATOR_KERNEL_EX( + GatherND, + kOnnxDomain, + 13, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", JsepSupportedDataTypes()), + GatherND); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + GatherND, + kOnnxDomain, + 12, + 12, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", JsepSupportedDataTypes()), + GatherND); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + GatherND, + kOnnxDomain, + 11, + 11, + kJsExecutionProvider, + (*KernelDefBuilder::Create()) + .TypeConstraint("T", JsepSupportedDataTypes()), + GatherND); + +} // namespace js +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/js/operators/gather_nd.h b/onnxruntime/core/providers/js/operators/gather_nd.h new file mode 100644 index 0000000000000..cdf7a52630dad --- /dev/null +++ b/onnxruntime/core/providers/js/operators/gather_nd.h @@ -0,0 +1,24 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/providers/js/js_kernel.h" + +namespace onnxruntime { +namespace js { + +class GatherND : public JsKernel { + public: + GatherND(const OpKernelInfo& info) : JsKernel(info) { + int64_t batchDims = info.GetAttrOrDefault("batch_dims", 0); + + JSEP_INIT_KERNEL_ATTRIBUTE(GatherND, ({ + "batch_dims" : Number($1), + }), + static_cast(batchDims)); + } +}; + +} // namespace js +} // namespace onnxruntime diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc index d979d53347c4f..1b432dad44263 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc @@ -1726,8 +1726,10 @@ TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProv } trt_version_ = getInferLibVersion(); + CUDA_CALL_THROW(cudaRuntimeGetVersion(&cuda_version_)); LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT version is " << trt_version_; + LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] CUDA version is " << cuda_version_; LOGS_DEFAULT(VERBOSE) << "[TensorRT EP] TensorRT provider options: " << "device_id: " << device_id_ @@ -2466,13 +2468,13 @@ TensorrtExecutionProvider::GetCapability(const GraphViewer& graph, // So, simply return the ComputeCapability here. if (graph.NumberOfNodes() == 1 && GraphHasCtxNode(graph)) { SubGraph_t supported_node_vector = {{0}, true}; - std::unique_ptr sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph), 0); + std::unique_ptr sub_graph = GetSubGraph(supported_node_vector, graph, TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)), 0); result.push_back(ComputeCapability::Create(std::move(sub_graph))); return result; } // Generate unique kernel name for TRT graph - HashValue model_hash = TRTGenerateId(graph); + HashValue model_hash = TRTGenerateId(graph, std::to_string(trt_version_), std::to_string(cuda_version_)); // Get supported node list from TensorRT parser const int number_of_ort_nodes = graph.NumberOfNodes(); diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h index 9e3a03417d917..d3e0b0fba8891 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.h @@ -333,6 +333,7 @@ class TensorrtExecutionProvider : public IExecutionProvider { // The format is as for TENSORRT_VERSION: (MAJOR * 100 + MINOR) * 100 + PATCH int32_t trt_version_; + int32_t cuda_version_; // The OrtAllocator object will be get during ep compute time // and should be kept for the lifetime of TRT EP object. diff --git a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h index 95abcd1bad2b8..5a7b135fd92cd 100644 --- a/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h +++ b/onnxruntime/core/providers/tensorrt/tensorrt_execution_provider_utils.h @@ -520,7 +520,7 @@ void RemoveCachesByType(const std::string& root, std::string file_extension) { * compiled kernels, so the name must be unique and deterministic across models and sessions. * */ -HashValue TRTGenerateId(const GraphViewer& graph_viewer) { +HashValue TRTGenerateId(const GraphViewer& graph_viewer, std::string trt_version, std::string cuda_version) { HashValue model_hash = 0; // find the top level graph @@ -583,12 +583,11 @@ HashValue TRTGenerateId(const GraphViewer& graph_viewer) { #endif #ifdef CUDA_VERSION - hash_str(std::to_string(CUDA_VERSION)); + hash_str(cuda_version); #endif #if defined(NV_TENSORRT_MAJOR) && defined(NV_TENSORRT_MINOR) - std::string TRT_VERSION = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR); - hash_str(TRT_VERSION); + hash_str(trt_version); #endif model_hash = hash[0] | (uint64_t(hash[1]) << 32); diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.cc b/onnxruntime/core/providers/webgpu/tensor/flatten.cc new file mode 100644 index 0000000000000..81d28bd3c0fa7 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/flatten.cc @@ -0,0 +1,52 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#include "core/providers/webgpu/tensor/flatten.h" +#include "core/providers/webgpu/webgpu_execution_provider.h" +#include "core/providers/webgpu/webgpu_supported_types.h" + +namespace onnxruntime { +namespace webgpu { + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Flatten, + kOnnxDomain, + 1, 8, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + Flatten); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Flatten, + kOnnxDomain, + 9, 10, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + Flatten); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Flatten, + kOnnxDomain, + 11, 12, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + Flatten); + +ONNX_OPERATOR_VERSIONED_KERNEL_EX( + Flatten, + kOnnxDomain, + 13, 20, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + Flatten); + +ONNX_OPERATOR_KERNEL_EX( + Flatten, + kOnnxDomain, + 21, + kWebGpuExecutionProvider, + (*KernelDefBuilder::Create()).TypeConstraint("T", WebGpuSupportedFloatTypes()).InputMemoryType(OrtMemTypeCPU, 1), + Flatten); + +} // namespace webgpu +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/webgpu/tensor/flatten.h b/onnxruntime/core/providers/webgpu/tensor/flatten.h new file mode 100644 index 0000000000000..5fc49a844b404 --- /dev/null +++ b/onnxruntime/core/providers/webgpu/tensor/flatten.h @@ -0,0 +1,62 @@ +// Copyright (c) Microsoft Corporation. All rights reserved. +// Licensed under the MIT License. + +#pragma once + +#include "core/framework/op_kernel.h" +#include "core/providers/cpu/nn/flatten.h" +#include "core/framework/data_transfer_manager.h" + +namespace onnxruntime { +namespace webgpu { + +class Flatten final : public OpKernel { + public: + explicit Flatten(const OpKernelInfo& info) : OpKernel{info} { + axis_ = info.GetAttrOrDefault("axis", 1); + } + + Status Compute(OpKernelContext* context) const override { + const Tensor* input_tensor = context->Input(0); + const TensorShape& input_shape = input_tensor->Shape(); + int64_t input_rank = input_shape.NumDimensions(); + + // Handle negative axis + int64_t axis = axis_; + if (axis < 0) { + axis += input_rank; + } + + if (axis > input_rank) { + return Status(common::ONNXRUNTIME, common::FAIL, "Invalid value for axis, must be less than or equal to input_rank"); + } + + int64_t first_dim = 1; + for (int64_t i = 0; i < axis; i++) { + first_dim *= input_shape[i]; + } + + int64_t second_dim = 1; + for (int64_t i = axis; i < input_rank; i++) { + second_dim *= input_shape[i]; + } + + TensorShape output_shape({first_dim, second_dim}); + Tensor* output_tensor = context->Output(0, output_shape); + + const void* source = input_tensor->DataRaw(); + void* target = output_tensor->MutableDataRaw(); + // If source and target pointers are not equal (non-inplace operation), we need to copy the data. + if (target != source) { + ORT_RETURN_IF_ERROR(Info().GetDataTransferManager().CopyTensor(*input_tensor, *output_tensor)); + } + + return Status::OK(); + } + + private: + int64_t axis_; +}; + +} // namespace webgpu +} // namespace onnxruntime \ No newline at end of file diff --git a/onnxruntime/core/providers/webgpu/webgpu_context.cc b/onnxruntime/core/providers/webgpu/webgpu_context.cc index 36aab2e628a16..ea0cbddb0205d 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_context.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_context.cc @@ -58,16 +58,15 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info adapter_toggles_desc.enabledToggleCount = enabled_adapter_toggles.size(); adapter_toggles_desc.enabledToggles = enabled_adapter_toggles.data(); - wgpu::RequestAdapterCallbackInfo req_adapter_callback_info = {}; - req_adapter_callback_info.mode = wgpu::CallbackMode::WaitAnyOnly; - req_adapter_callback_info.callback = [](WGPURequestAdapterStatus status, - WGPUAdapter adapter, const char* message, - void* userdata) { - ORT_ENFORCE(status == WGPURequestAdapterStatus_Success, "Failed to get a WebGPU adapter: ", message); - *static_cast(userdata) = wgpu::Adapter::Acquire(adapter); - }; - req_adapter_callback_info.userdata = &adapter_; - ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(instance_.RequestAdapter(&req_adapter_options, req_adapter_callback_info), UINT64_MAX)); + ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(instance_.RequestAdapter( + &req_adapter_options, + wgpu::CallbackMode::WaitAnyOnly, + [](wgpu::RequestAdapterStatus status, wgpu::Adapter adapter, wgpu::StringView message, wgpu::Adapter* ptr) { + ORT_ENFORCE(status == wgpu::RequestAdapterStatus::Success, "Failed to get a WebGPU adapter: ", std::string_view{message}); + *ptr = adapter; + }, + &adapter_), + UINT64_MAX)); ORT_ENFORCE(adapter_ != nullptr, "Failed to get a WebGPU adapter."); } @@ -103,14 +102,15 @@ void WebGpuContext::Initialize(const WebGpuExecutionProviderInfo& webgpu_ep_info std::cerr << "WebGPU device lost (" << int(reason) << "): " << message; }); - wgpu::RequestDeviceCallbackInfo req_device_callback_info = {}; - req_device_callback_info.mode = wgpu::CallbackMode::WaitAnyOnly; - req_device_callback_info.callback = [](WGPURequestDeviceStatus status, WGPUDevice device, char const* message, void* userdata) { - ORT_ENFORCE(status == WGPURequestDeviceStatus_Success, "Failed to get a WebGPU device: ", message); - *static_cast(userdata) = wgpu::Device::Acquire(device); - }; - req_device_callback_info.userdata = &device_; - ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(adapter_.RequestDevice(&device_desc, req_device_callback_info), UINT64_MAX)); + ORT_ENFORCE(wgpu::WaitStatus::Success == instance_.WaitAny(adapter_.RequestDevice( + &device_desc, + wgpu::CallbackMode::WaitAnyOnly, + [](wgpu::RequestDeviceStatus status, wgpu::Device device, wgpu::StringView message, wgpu::Device* ptr) { + ORT_ENFORCE(status == wgpu::RequestDeviceStatus::Success, "Failed to get a WebGPU device: ", std::string_view{message}); + *ptr = device; + }, + &device_), + UINT64_MAX)); ORT_ENFORCE(device_ != nullptr, "Failed to get a WebGPU device."); } diff --git a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc index f20c68ac0bfaf..66209adf6f1a9 100644 --- a/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc +++ b/onnxruntime/core/providers/webgpu/webgpu_execution_provider.cc @@ -347,7 +347,8 @@ class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 1, 8, Flatten); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 9, 10, Flatten); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 11, 12, Flatten); -class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Flatten); +class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, 20, Flatten); +class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 21, Flatten); class ONNX_OPERATOR_VERSIONED_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 6, 12, Tile); class ONNX_OPERATOR_KERNEL_CLASS_NAME(kWebGpuExecutionProvider, kOnnxDomain, 13, Tile); @@ -667,10 +668,12 @@ std::unique_ptr RegisterKernels() { // BuildKernelCreateInfo, // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, - // BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, + BuildKernelCreateInfo, BuildKernelCreateInfo, diff --git a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc index 329db75316e82..52fcc39ae5418 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/conv_op_builder.cc @@ -311,12 +311,12 @@ Status ConvOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (input_defs.size() >= 3) { x_zero_point = model_builder.GetOperand(node.InputDefs()[2]->Name()); } else { - x_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + x_zero_point = model_builder.CreateOrGetConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0); } if (input_defs.size() >= 4) { w_zero_point = model_builder.GetOperand(node.InputDefs()[3]->Name()); } else { - w_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + w_zero_point = model_builder.CreateOrGetConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0); } output = model_builder.GetBuilder().call("conv2dInteger", input, x_zero_point, filter, w_zero_point, options); diff --git a/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc index 5434194a214ac..9bb930c63b009 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/dropout_op_builder.cc @@ -59,22 +59,14 @@ Status DropoutOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, std::vector mask_shape; ORT_RETURN_IF_NOT(GetShape(*output_defs[1], mask_shape, logger), "Cannot get mask output's shape"); std::vector dims = GetVecUint32FromVecInt64(mask_shape); - - emscripten::val desc = emscripten::val::object(); - desc.set("dataType", "uint8"); - desc.set("dimensions", emscripten::val::array(dims)); - desc.set("shape", emscripten::val::array(dims)); - const auto num_elements = narrow(Product(mask_shape)); - emscripten::val ones_buffer = emscripten::val::global("Uint8Array").new_(num_elements); - ones_buffer.call("fill", 1); - - emscripten::val mask_output = model_builder.GetBuilder().call("constant", desc, ones_buffer); + emscripten::val one_constant = model_builder.CreateOrGetConstant( + ONNX_NAMESPACE::TensorProto_DataType_BOOL, 1, dims); emscripten::val options = emscripten::val::object(); options.set("label", output_defs[1]->Name() + "_identity"); // Add additional identity op in case the mask is the output of a WebNN graph, // beacuse WebNN does not support a constant operand as output. - mask_output = model_builder.GetBuilder().call("identity", mask_output, options); + emscripten::val mask_output = model_builder.GetBuilder().call("identity", one_constant, options); model_builder.AddOperand(output_defs[1]->Name(), std::move(mask_output)); } return Status::OK(); diff --git a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc index 1477530ce1894..252d49a2f4d4d 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/gemm_op_builder.cc @@ -113,12 +113,12 @@ Status GemmOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const N if (input_defs.size() >= 3) { a_zero_point = model_builder.GetOperand(node.InputDefs()[2]->Name()); } else { - a_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + a_zero_point = model_builder.CreateOrGetConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0); } if (input_defs.size() >= 4) { b_zero_point = model_builder.GetOperand(node.InputDefs()[3]->Name()); } else { - b_zero_point = model_builder.GetZeroConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8); + b_zero_point = model_builder.CreateOrGetConstant(ONNX_NAMESPACE::TensorProto_DataType_UINT8, 0); } output = model_builder.GetBuilder().call("matmulInteger", a, diff --git a/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc index bdd1283c720f3..19f6d6aff8f97 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/lrn_op_builder.cc @@ -29,7 +29,8 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, const Node& node, const logging::Logger& logger) const { const auto& input_defs = node.InputDefs(); - const auto input_data_type = input_defs[0]->TypeAsProto()->tensor_type().elem_type(); + int32_t input_data_type; + ORT_RETURN_IF_NOT(GetType(*input_defs[0], input_data_type, logger), "Cannot get input type"); emscripten::val input = model_builder.GetOperand(input_defs[0]->Name()); const auto node_name = node.Name(); emscripten::val wnn_builder = model_builder.GetBuilder(); @@ -42,10 +43,10 @@ Status LRNOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, // Prepare WebNN constants for alpha, beta, bias attributes. // Assume T is float, because input_data_type has been limited to float32 and float16 in 'hasSupportedInitsImpl'. - emscripten::val alpha_constant = model_builder.CreateOrGetScalarConstant(input_data_type, alpha); - emscripten::val beta_constant = model_builder.CreateOrGetScalarConstant(input_data_type, beta); - emscripten::val bias_constant = model_builder.CreateOrGetScalarConstant(input_data_type, bias); - emscripten::val pow1_constant = model_builder.CreateOrGetScalarConstant(input_data_type, 2); + emscripten::val alpha_constant = model_builder.CreateOrGetConstant(input_data_type, alpha); + emscripten::val beta_constant = model_builder.CreateOrGetConstant(input_data_type, beta); + emscripten::val bias_constant = model_builder.CreateOrGetConstant(input_data_type, bias); + emscripten::val pow1_constant = model_builder.CreateOrGetConstant(input_data_type, 2); /** WebNN doesn't support LRN. So decompose it into a series of ops: diff --git a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc index fa82c2f85f0d8..79ed0393e3044 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/normalization_op_builder.cc @@ -100,7 +100,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder X --> Pow --> ReduceMean --> Add --> Sqrt --> Div -> Mul ^ ^ ^ ^ ^ | | | | | - Y:2 axis B:epsilon A:X A:scale + Y:2 axis B:epsilon A:X A:scale */ int32_t input_type; @@ -108,13 +108,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder emscripten::val common_options = emscripten::val::object(); // Pow - emscripten::val pow_constant_desc = emscripten::val::object(); - ORT_RETURN_IF_NOT(SetWebnnDataType(pow_constant_desc, input_type), "Unsupported data type"); - pow_constant_desc.set("shape", emscripten::val::array()); - emscripten::val pow_buffer = emscripten::val::global("Float32Array").new_(1); - pow_buffer.set(0, 2); - emscripten::val pow_constant = - model_builder.GetBuilder().call("constant", pow_constant_desc, pow_buffer); + emscripten::val pow_constant = model_builder.CreateOrGetConstant(input_type, 2); common_options.set("label", node.Name() + "_pow"); emscripten::val pow = model_builder.GetBuilder().call("pow", input, pow_constant, common_options); @@ -127,13 +121,7 @@ Status NormalizationOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder emscripten::val reduce_mean = model_builder.GetBuilder().call("reduceMean", pow, reduce_options); // Add - emscripten::val add_constant_desc = emscripten::val::object(); - ORT_RETURN_IF_NOT(SetWebnnDataType(add_constant_desc, input_type), "Unsupported data type"); - add_constant_desc.set("shape", emscripten::val::array()); - emscripten::val add_buffer = emscripten::val::global("Float32Array").new_(1); - add_buffer.set(0, epsilon); - emscripten::val add_constant = - model_builder.GetBuilder().call("constant", add_constant_desc, add_buffer); + emscripten::val add_constant = model_builder.CreateOrGetConstant(input_type, epsilon); common_options.set("label", node.Name() + "_add"); emscripten::val add = model_builder.GetBuilder().call("add", reduce_mean, add_constant, common_options); diff --git a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc index 88fb79b146cd9..ca15e123d0999 100644 --- a/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/impl/qdq_op_builder.cc @@ -100,7 +100,10 @@ Status QDQOpBuilder::AddToModelBuilderImpl(ModelBuilder& model_builder, // zero_point has the same shape as the scale tensor. zero_point_shape = GetVecUint32FromVecInt64(scale_shape); } - zero_point = model_builder.GetZeroConstant(zero_point_type, zero_point_shape); + // Create a zero constant with the same shape as the scale tensor. + // The zero value has been pre-processed in the CreateOrGetConstant function, + // so the type of T is not relevant here. + zero_point = model_builder.CreateOrGetConstant(zero_point_type, 0, zero_point_shape); } emscripten::val options = emscripten::val::object(); diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.cc b/onnxruntime/core/providers/webnn/builders/model_builder.cc index 8a82fce42189d..e8f116d390199 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.cc +++ b/onnxruntime/core/providers/webnn/builders/model_builder.cc @@ -14,7 +14,6 @@ #include "core/providers/common.h" #include "core/providers/shared/utils/utils.h" -#include #include namespace onnxruntime { @@ -385,73 +384,6 @@ void ModelBuilder::AddOperand(const std::string& name, const emscripten::val& op wnn_operands_.insert(std::make_pair(name, operand)); } -// Get the zero constant with shape. -const emscripten::val& ModelBuilder::GetZeroConstant(const int32_t& data_type, - const std::vector& shape) { - std::string name = "webnn_zero_constant_" + std::to_string(data_type); - emscripten::val dims = emscripten::val::array(); - if (!shape.empty()) { - dims = emscripten::val::array(shape); - std::ostringstream name_stream; - name_stream << name; - for (const auto& dim : shape) { - name_stream << "_" << dim; - } - name = name_stream.str(); - } - // If the operand does not exist, create it. - if (wnn_operands_.find(name) == wnn_operands_.end()) { - emscripten::val desc = emscripten::val::object(); - desc.set("dimensions", dims); - desc.set("shape", dims); - emscripten::val zero_buffer = emscripten::val::undefined(); - if (!SetWebnnDataType(desc, data_type)) { - ORT_THROW("Unsupported data type: " + std::to_string(data_type)); - } - auto num_elements = Product(shape); - switch (data_type) { - case ONNX_NAMESPACE::TensorProto_DataType_INT4: - case ONNX_NAMESPACE::TensorProto_DataType_UINT4: - // For WebNN int4 and uint4 tensors are stored in Uint8Array, - // so we need to adjust the number of elements. - num_elements = (num_elements + 1) / 2; - zero_buffer = emscripten::val::global("Uint8Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_BOOL: - case ONNX_NAMESPACE::TensorProto_DataType_UINT8: - zero_buffer = emscripten::val::global("Uint8Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT8: - zero_buffer = emscripten::val::global("Int8Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: - zero_buffer = emscripten::val::global("Uint16Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - zero_buffer = emscripten::val::global("Float32Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT32: - zero_buffer = emscripten::val::global("Int32Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_INT64: - zero_buffer = emscripten::val::global("BigInt64Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_UINT32: - zero_buffer = emscripten::val::global("Uint32Array").new_(num_elements); - break; - case ONNX_NAMESPACE::TensorProto_DataType_UINT64: - zero_buffer = emscripten::val::global("BigUint64Array").new_(num_elements); - break; - default: - break; - } - - emscripten::val zero_constant = wnn_builder_.call("constant", desc, zero_buffer); - wnn_operands_.insert(std::make_pair(name, zero_constant)); - } - return wnn_operands_.at(name); -} - void ModelBuilder::AddInitializerToSkip(const std::string& tensor_name) { skipped_initializers_.insert(tensor_name); } diff --git a/onnxruntime/core/providers/webnn/builders/model_builder.h b/onnxruntime/core/providers/webnn/builders/model_builder.h index c482e9d05b301..0fc2fa20670c7 100644 --- a/onnxruntime/core/providers/webnn/builders/model_builder.h +++ b/onnxruntime/core/providers/webnn/builders/model_builder.h @@ -11,6 +11,7 @@ #include "core/framework/execution_provider.h" #include "core/providers/webnn/builders/helper.h" +#include #include #include @@ -38,11 +39,10 @@ class ModelBuilder { const emscripten::val& GetOpSupportLimits() const { return wnn_limits_; } void AddOperand(const std::string& name, const emscripten::val& operand); - const emscripten::val& GetZeroConstant( - const int32_t& data_type, const std::vector& shape = {}); template - const emscripten::val& CreateOrGetScalarConstant(const int32_t& data_type, T value); + const emscripten::val& CreateOrGetConstant(const int32_t& data_type, T value, + const std::vector& shape = {}); // Use the buffers to persist WebNN allocated data like transposed weight. // It ensures the validity during inference session. @@ -103,11 +103,12 @@ class ModelBuilder { static const IOpBuilder* GetOpBuilder(const Node& node); }; -// Create a scalar constant MLOperand of the specified value and data type. -// Workaround for builer.constant(type, value) method since it has not been implemented now. +// Create or retrieve one of the following: +// - A WebNN constant MLOperand filled with the specified value, data type, and shape. +// - A WebNN scalar constant MLOperand with the specified value and data type. +// For scalar constant, it is workaround for builer.constant(type, value) method since +// it has not been implemented now. // https://webmachinelearning.github.io/webnn/#api-mlgraphbuilder-constant-type-value -// BTW, the spec is discussing if the builder.constant(type, value) should be dropped at -// https://github.com/webmachinelearning/webnn/issues/475. Fix me according to the spec decision. // // This function enforces a mapping between the data_type and the value types: // - TensorProto_DataType_INT4 <-> int8_t @@ -122,69 +123,96 @@ class ModelBuilder { // - TensorProto_DataType_UINT32 <-> uint32_t // - TensorProto_DataType_UINT64 <-> uint64_t template -const emscripten::val& ModelBuilder::CreateOrGetScalarConstant(const int32_t& data_type, T value) { - std::string name = "webnn_scalar_constant_" + std::to_string(data_type) + "_" + std::to_string(value); - emscripten::val desc = emscripten::val::object(); - desc.set("shape", emscripten::val::array()); - emscripten::val scalar_buffer = emscripten::val::undefined(); - uint16_t value_uint16 = 0; - uint8_t value_uint8 = 0; - if (!SetWebnnDataType(desc, data_type)) { - ORT_THROW("Unsupported data type: " + std::to_string(data_type)); +const emscripten::val& ModelBuilder::CreateOrGetConstant(const int32_t& data_type, T value, + const std::vector& shape) { + std::string name = "webnn_constant_" + std::to_string(data_type) + "_" + std::to_string(value); + emscripten::val dims = emscripten::val::array(); + if (!shape.empty()) { + dims = emscripten::val::array(shape); + std::ostringstream name_stream; + name_stream << name; + for (const auto& dim : shape) { + name_stream << "_" << dim; + } + name = name_stream.str(); } // If the operand does not exist, create it. if (wnn_operands_.find(name) == wnn_operands_.end()) { + emscripten::val desc = emscripten::val::object(); + desc.set("shape", dims); + desc.set("dimensions", dims); + emscripten::val buffer = emscripten::val::undefined(); + if (!SetWebnnDataType(desc, data_type)) { + ORT_THROW("Unsupported data type: " + std::to_string(data_type)); + } + auto num_elements = Product(shape); switch (data_type) { case ONNX_NAMESPACE::TensorProto_DataType_INT4: case ONNX_NAMESPACE::TensorProto_DataType_UINT4: - scalar_buffer = emscripten::val::global("Uint8Array").new_(1); - value_uint8 = PackInt8ToUint8AsNibble(value, data_type); - scalar_buffer.call("fill", emscripten::val(value_uint8)); + // For WebNN int4 and uint4 tensors are stored in Uint8Array, + // so we need to adjust the number of elements. + num_elements = (num_elements + 1) / 2; + buffer = emscripten::val::global("Uint8Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(PackInt8ToUint8AsNibble(value, data_type))); + } break; case ONNX_NAMESPACE::TensorProto_DataType_BOOL: - scalar_buffer = emscripten::val::global("Uint8Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value ? 1 : 0)); - break; case ONNX_NAMESPACE::TensorProto_DataType_UINT8: - scalar_buffer = emscripten::val::global("Uint8Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value)); + buffer = emscripten::val::global("Uint8Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_INT8: - scalar_buffer = emscripten::val::global("Int8Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value)); + buffer = emscripten::val::global("Int8Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_FLOAT16: - scalar_buffer = emscripten::val::global("Uint16Array").new_(1); - value_uint16 = PackFloat32ToUint16AsFloat16(value); - scalar_buffer.call("fill", emscripten::val(value_uint16)); + buffer = emscripten::val::global("Uint16Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(PackFloat32ToUint16AsFloat16(value))); + } break; case ONNX_NAMESPACE::TensorProto_DataType_FLOAT: - scalar_buffer = emscripten::val::global("Float32Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value)); + buffer = emscripten::val::global("Float32Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_INT32: - scalar_buffer = emscripten::val::global("Int32Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value)); + buffer = emscripten::val::global("Int32Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_UINT32: - scalar_buffer = emscripten::val::global("Uint32Array").new_(1); - scalar_buffer.call("fill", emscripten::val(value)); + buffer = emscripten::val::global("Uint32Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_INT64: - scalar_buffer = emscripten::val::global("BigInt64Array").new_(1); - scalar_buffer.call("fill", emscripten::val::global("BigInt")(value)); + buffer = emscripten::val::global("BigInt64Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val::global("BigInt")(value)); + } break; case ONNX_NAMESPACE::TensorProto_DataType_UINT64: - scalar_buffer = emscripten::val::global("BigUint64Array").new_(1); - scalar_buffer.call("fill", emscripten::val::global("BigInt")(value)); + buffer = emscripten::val::global("BigUint64Array").new_(num_elements); + if (value) { + buffer.call("fill", emscripten::val::global("BigInt")(value)); + } break; default: break; } - const emscripten::val scalar_constant = wnn_builder_.call("constant", desc, scalar_buffer); - wnn_operands_.insert(std::make_pair(name, scalar_constant)); + const emscripten::val constant = wnn_builder_.call("constant", desc, buffer); + wnn_operands_.insert(std::make_pair(name, constant)); } return wnn_operands_.at(name); diff --git a/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp b/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp index a455007c2f6ae..b598c20e29280 100644 --- a/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp +++ b/onnxruntime/test/mlas/unittest/test_hqnbitgemm_neon.cpp @@ -81,7 +81,6 @@ class MlasNeonFp16CastTest : public MlasTestBase { class MlasNeonFp16PrepackTest : public MlasTestBase { private: - std::random_device rd_; // a seed source for the random number engine unsigned int seed_; std::mt19937 gen_; // mersenne_twister_engine seeded with rd() std::uniform_int_distribution<> distrib_; @@ -173,7 +172,7 @@ class MlasNeonFp16PrepackTest : public MlasTestBase { public: MlasNeonFp16PrepackTest() - : seed_(rd_()), gen_(seed_), distrib_(0, 255) { + : seed_(19287), gen_(seed_), distrib_(0, 255) { } static const char* GetTestSuiteName() { @@ -197,7 +196,6 @@ class MlasNeonFp16PrepackTest : public MlasTestBase { class MlasNeonFp16DequantBTest : public MlasTestBase { private: - std::random_device rd_; // a seed source for the random number engine unsigned int seed_; std::mt19937 gen_; // mersenne_twister_engine seeded with rd() std::uniform_int_distribution<> distrib_; @@ -318,7 +316,7 @@ class MlasNeonFp16DequantBTest : public MlasTestBase { public: MlasNeonFp16DequantBTest() - : seed_(rd_()), gen_(seed_), distrib_(0, 255), _distribFp(0.5f, 2.0f) { + : seed_(19287), gen_(seed_), distrib_(0, 255), _distribFp(0.5f, 2.0f) { } static const char* GetTestSuiteName() { @@ -353,7 +351,6 @@ class MlasNeonFp16DequantBTest : public MlasTestBase { class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase { private: - std::random_device rd_; // a seed source for the random number engine unsigned int seed_; std::mt19937 gen_; // mersenne_twister_engine seeded with rd() MatrixGuardBuffer A_, B_, C_, ref_, bias_; @@ -404,7 +401,7 @@ class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase { for (size_t m = 0; m < M; ++m) { for (size_t n = 0; n < N; ++n) { size_t i = m * Ldc + n; - ASSERT_TRUE(FloatEqual(target[i], ref[i], 0.015f, 0.03f)) + ASSERT_TRUE(FloatEqual(target[i], ref[i], 0.02f, 0.055f)) << " seed " << seed_ << " v0 " << target[i] << " v1 " << ref[i] << " m " << m << " n " << n; @@ -439,7 +436,7 @@ class MlasNeonFp16HQ4BitGemmKernelTest : public MlasTestBase { public: MlasNeonFp16HQ4BitGemmKernelTest() - : seed_(rd_()), gen_(seed_) { + : seed_(19287), gen_(seed_) { } static const char* GetTestSuiteName() { diff --git a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc index 63327a028c6f4..0022d7fc0e184 100644 --- a/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc +++ b/onnxruntime/test/providers/tensorrt/tensorrt_basic_test.cc @@ -342,8 +342,12 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { Graph& graph = model->MainGraph(); GraphViewer viewer(graph); + std::string trt_version = std::to_string(NV_TENSORRT_MAJOR) + "." + std::to_string(NV_TENSORRT_MINOR); + std::string cuda_version = std::to_string(CUDA_VERSION); + std::string ort_version = ORT_VERSION; + // get the hash for the model when loaded from file - HashValue model_hash = TRTGenerateId(viewer); + HashValue model_hash = TRTGenerateId(viewer, trt_version, cuda_version); ASSERT_NE(model_hash, 0); // now load the model from bytes and check the hash differs @@ -358,7 +362,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { // Test loading same model from file and byte steam. Hash values should be different Graph& graph2 = model2->MainGraph(); GraphViewer viewer2(graph2); - HashValue model_hash2 = TRTGenerateId(viewer2); + HashValue model_hash2 = TRTGenerateId(viewer2, trt_version, cuda_version); ASSERT_NE(model_hash, model_hash2); // Test loading same model from different path, see if hash values are same as well @@ -367,7 +371,7 @@ TEST(TensorrtExecutionProviderTest, TRTModelIdGeneratorUsingModelHashing) { ASSERT_TRUE(Model::Load(model_path, model3, nullptr, DefaultLoggingManager().DefaultLogger()).IsOK()); Graph& graph3 = model3->MainGraph(); GraphViewer viewer3(graph3); - HashValue model_hash3 = TRTGenerateId(viewer3); + HashValue model_hash3 = TRTGenerateId(viewer3, trt_version, cuda_version); ASSERT_EQ(model_hash, model_hash3) << "model 1&3 are same models and they have same hash, no matter where they are loaded"; } diff --git a/onnxruntime/test/python/onnx_backend_test_series.py b/onnxruntime/test/python/onnx_backend_test_series.py index 8fc76da3495a8..a274b90dc042f 100644 --- a/onnxruntime/test/python/onnx_backend_test_series.py +++ b/onnxruntime/test/python/onnx_backend_test_series.py @@ -105,7 +105,7 @@ def load_jsonc(basename: str): return json.loads("\n".join(lines)) -def create_backend_test(test_name=None): +def create_backend_test(devices: list[str], test_name=None): """Creates an OrtBackendTest and adds its TestCase's to global scope so unittest will find them.""" overrides = load_jsonc("onnx_backend_test_series_overrides.jsonc") @@ -126,30 +126,29 @@ def create_backend_test(test_name=None): else: filters = load_jsonc("onnx_backend_test_series_filters.jsonc") current_failing_tests = apply_filters(filters, "current_failing_tests") - if platform.architecture()[0] == "32bit": current_failing_tests += apply_filters(filters, "current_failing_tests_x86") - if backend.supports_device("DNNL"): + if backend.supports_device("DNNL") or "DNNL" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_DNNL") - if backend.supports_device("NNAPI"): + if backend.supports_device("NNAPI") or "NNAPI" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_NNAPI") - if backend.supports_device("OPENVINO_GPU"): + if backend.supports_device("OPENVINO_GPU") or "OPENVINO_GPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_GPU") - if backend.supports_device("OPENVINO_CPU"): + if backend.supports_device("OPENVINO_CPU") or "OPENVINO_CPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP32") current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_CPU_FP16") - if backend.supports_device("OPENVINO_NPU"): + if backend.supports_device("OPENVINO_NPU") or "OPENVINO_NPU" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_NPU") - if backend.supports_device("OPENVINO"): + if backend.supports_device("OPENVINO") or "OPENVINO" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_OPENVINO_opset18") - if backend.supports_device("MIGRAPHX"): + if backend.supports_device("MIGRAPHX") or "MIGRAPHX" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_MIGRAPHX") if backend.supports_device("WEBGPU"): @@ -158,8 +157,16 @@ def create_backend_test(test_name=None): # Skip these tests for a "pure" DML onnxruntime python wheel. We keep these tests enabled for instances where both DML and CUDA # EPs are available (Windows GPU CI pipeline has this config) - these test will pass because CUDA has higher precedence than DML # and the nodes are assigned to only the CUDA EP (which supports these tests) - if backend.supports_device("DML") and not backend.supports_device("GPU"): + if (backend.supports_device("DML") and not backend.supports_device("GPU")) or "DML" in devices: current_failing_tests += apply_filters(filters, "current_failing_tests_pure_DML") + # exclude CUDA EP when DML test is running. + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,CUDAExecutionProvider" + elif backend.supports_device("DML") and "DML" not in devices: + # exclude DML EP when CUDA test is running. + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider,DmlExecutionProvider" + else: + # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior + os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider" filters = ( current_failing_tests @@ -172,9 +179,6 @@ def create_backend_test(test_name=None): backend_test.exclude("(" + "|".join(filters) + ")") print("excluded tests:", filters) - # exclude TRT EP temporarily and only test CUDA EP to retain previous behavior - os.environ["ORT_ONNX_BACKEND_EXCLUDE_PROVIDERS"] = "TensorrtExecutionProvider" - # import all test cases at global scope to make # them visible to python.unittest. globals().update(backend_test.enable_report().test_cases) @@ -199,6 +203,15 @@ def parse_args(): help="Only run tests that match this value. Matching is regex based, and '.*' is automatically appended", ) + parser.add_argument( + "--devices", + type=str, + choices=["CPU", "CUDA", "MIGRAPHX", "DNNL", "DML", "OPENVINO_GPU", "OPENVINO_CPU", "OPENVINO_NPU", "OPENVINO"], + nargs="+", # allows multiple values + default=["CPU"], # default to ["CPU"] if no input is given + help="Select one or more devices CPU, CUDA, MIGRAPHX, DNNL, DML, OPENVINO_GPU, OPENVINO_CPU, OPENVINO_NPU, OPENVINO", + ) + # parse just our args. python unittest has its own args and arg parsing, and that runs inside unittest.main() parsed, unknown = parser.parse_known_args() sys.argv = sys.argv[:1] + unknown @@ -209,5 +222,5 @@ def parse_args(): if __name__ == "__main__": args = parse_args() - create_backend_test(args.test_name) + create_backend_test(args.devices, args.test_name) unittest.main() diff --git a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc index f083ab14ad133..7ecaab6fedb02 100644 --- a/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc +++ b/onnxruntime/test/testdata/onnx_backend_test_series_filters.jsonc @@ -750,6 +750,13 @@ "^test_reduce_log_sum_empty_set_cpu", "^test_reduce_log_sum_exp_empty_set_cpu", "^test_reduce_prod_empty_set_cpu", + // Bug: DML EP some how executes these CUDA tests and failed + // TODO: Remove these tests when DML EP is fixed + "^test_convtranspose_autopad_same_cuda", + "^test_asin_example_cuda", + "^test_dynamicquantizelinear_cuda", + "^test_dynamicquantizelinear_expanded_cuda", + "^test_reduce_min_empty_set_cuda", //Bug: DML EP does not execute operators with an empty input tensor //TODO: Resolve as a graph implementation that returns a constant inf tensor with appropriate strides "^test_reduce_min_empty_set_cpu" diff --git a/onnxruntime/test/wasm/package-lock.json b/onnxruntime/test/wasm/package-lock.json index 522e96fc3188a..3bd5d173dbe79 100644 --- a/onnxruntime/test/wasm/package-lock.json +++ b/onnxruntime/test/wasm/package-lock.json @@ -27,9 +27,9 @@ } }, "node_modules/@socket.io/component-emitter": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz", - "integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz", + "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==", "dev": true }, "node_modules/@types/cookie": { @@ -39,19 +39,22 @@ "dev": true }, "node_modules/@types/cors": { - "version": "2.8.13", - "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.13.tgz", - "integrity": "sha512-RG8AStHlUiV5ysZQKq97copd2UmVYw3/pRMLefISZ3S1hK104Cwm7iLQ3fTKx+lsUH2CE8FlLaYeEA2LSeqYUA==", + "version": "2.8.17", + "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz", + "integrity": "sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==", "dev": true, "dependencies": { "@types/node": "*" } }, "node_modules/@types/node": { - "version": "18.13.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz", - "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg==", - "dev": true + "version": "22.10.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", + "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", + "dev": true, + "dependencies": { + "undici-types": "~6.20.0" + } }, "node_modules/accepts": { "version": "1.3.8", @@ -162,12 +165,12 @@ } }, "node_modules/braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "dependencies": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" }, "engines": { "node": ">=8" @@ -288,9 +291,9 @@ } }, "node_modules/cookie": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.2.tgz", - "integrity": "sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA==", + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", "dev": true, "engines": { "node": ">= 0.6" @@ -409,9 +412,9 @@ } }, "node_modules/engine.io": { - "version": "6.4.2", - "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.4.2.tgz", - "integrity": "sha512-FKn/3oMiJjrOEOeUub2WCox6JhxBXq/Zn3fZOMCBxKnNYtsdKjxhl7yR3fZhM9PV+rdE75SU5SYMc+2PGzo+Tg==", + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.6.2.tgz", + "integrity": "sha512-gmNvsYi9C8iErnZdVcJnvCpSKbWTt1E8+JZo8b+daLninywUWi5NQ5STSHZ9rFjFO7imNcvb8Pc5pe/wMR5xEw==", "dev": true, "dependencies": { "@types/cookie": "^0.4.1", @@ -419,32 +422,32 @@ "@types/node": ">=10.0.0", "accepts": "~1.3.4", "base64id": "2.0.0", - "cookie": "~0.4.1", + "cookie": "~0.7.2", "cors": "~2.8.5", "debug": "~4.3.1", - "engine.io-parser": "~5.0.3", - "ws": "~8.11.0" + "engine.io-parser": "~5.2.1", + "ws": "~8.17.1" }, "engines": { - "node": ">=10.0.0" + "node": ">=10.2.0" } }, "node_modules/engine.io-parser": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.0.6.tgz", - "integrity": "sha512-tjuoZDMAdEhVnSFleYPCtdL2GXwVTGtNjoeJd9IhIG3C1xs9uwxqRNEu5WpnDZCaozwVlK/nuQhpodhXSIMaxw==", + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz", + "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==", "dev": true, "engines": { "node": ">=10.0.0" } }, "node_modules/engine.io/node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -456,9 +459,9 @@ } }, "node_modules/engine.io/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true }, "node_modules/ent": { @@ -516,9 +519,9 @@ "dev": true }, "node_modules/fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "dependencies": { "to-regex-range": "^5.0.1" @@ -1304,35 +1307,60 @@ } }, "node_modules/socket.io": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.6.0.tgz", - "integrity": "sha512-b65bp6INPk/BMMrIgVvX12x3Q+NqlGqSlTuvKQWt0BUJ3Hyy3JangBl7fEoWZTXbOKlCqNPbQ6MbWgok/km28w==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.8.1.tgz", + "integrity": "sha512-oZ7iUCxph8WYRHHcjBEc9unw3adt5CmSNlppj/5Q4k2RIrhl8Z5yY2Xr4j9zj0+wzVZ0bxmYoGSzKJnRl6A4yg==", "dev": true, "dependencies": { "accepts": "~1.3.4", "base64id": "~2.0.0", + "cors": "~2.8.5", "debug": "~4.3.2", - "engine.io": "~6.4.0", + "engine.io": "~6.6.0", "socket.io-adapter": "~2.5.2", - "socket.io-parser": "~4.2.1" + "socket.io-parser": "~4.2.4" }, "engines": { - "node": ">=10.0.0" + "node": ">=10.2.0" } }, "node_modules/socket.io-adapter": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.2.tgz", - "integrity": "sha512-87C3LO/NOMc+eMcpcxUBebGjkpMDkNBS9tf7KJqcDsmL936EChtVva71Dw2q4tQcuVC+hAUy4an2NO/sYXmwRA==", + "version": "2.5.5", + "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.5.tgz", + "integrity": "sha512-eLDQas5dzPgOWCk9GuuJC2lBqItuhKI4uxGgo9aIV7MYbk2h9Q6uULEh8WBzThoI7l+qU9Ast9fVUmkqPP9wYg==", + "dev": true, + "dependencies": { + "debug": "~4.3.4", + "ws": "~8.17.1" + } + }, + "node_modules/socket.io-adapter/node_modules/debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "dependencies": { - "ws": "~8.11.0" + "ms": "^2.1.3" + }, + "engines": { + "node": ">=6.0" + }, + "peerDependenciesMeta": { + "supports-color": { + "optional": true + } } }, + "node_modules/socket.io-adapter/node_modules/ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + }, "node_modules/socket.io-parser": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.3.tgz", - "integrity": "sha512-JMafRntWVO2DCJimKsRTh/wnqVvO4hrfwOqtO7f+uzwsQMuxO6VwImtYxaQ+ieoyshWOTJyV0fA21lccEXRPpQ==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.4.tgz", + "integrity": "sha512-/GbIKmo8ioc+NIWIhwdecY0ge+qVBSMdgxGygevmdHj24bsfgtCmcUUcQ5ZzcylGFHsN3k4HB4Cgkl96KVnuew==", "dev": true, "dependencies": { "@socket.io/component-emitter": "~3.1.0", @@ -1343,12 +1371,12 @@ } }, "node_modules/socket.io-parser/node_modules/debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "dependencies": { - "ms": "2.1.2" + "ms": "^2.1.3" }, "engines": { "node": ">=6.0" @@ -1360,9 +1388,9 @@ } }, "node_modules/socket.io-parser/node_modules/ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true }, "node_modules/socket.io/node_modules/debug": { @@ -1534,6 +1562,12 @@ "node": "*" } }, + "node_modules/undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true + }, "node_modules/universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", @@ -1615,16 +1649,16 @@ "dev": true }, "node_modules/ws": { - "version": "8.11.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.11.0.tgz", - "integrity": "sha512-HPG3wQd9sNQoT9xHyNCXoDUa+Xw/VevmY9FoHyQ+g+rrMn4j6FB4np7Z0OhdTgjx6MgQLK7jwSy1YecU1+4Asg==", + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", + "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", "dev": true, "engines": { "node": ">=10.0.0" }, "peerDependencies": { "bufferutil": "^4.0.1", - "utf-8-validate": "^5.0.2" + "utf-8-validate": ">=5.0.2" }, "peerDependenciesMeta": { "bufferutil": { @@ -1686,9 +1720,9 @@ "dev": true }, "@socket.io/component-emitter": { - "version": "3.1.0", - "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.0.tgz", - "integrity": "sha512-+9jVqKhRSpsc591z5vX+X5Yyw+he/HCB4iQ/RYxw35CEPaY1gnsNE43nf9n9AaYjAQrTiI/mOwKUKdUs9vf7Xg==", + "version": "3.1.2", + "resolved": "https://registry.npmjs.org/@socket.io/component-emitter/-/component-emitter-3.1.2.tgz", + "integrity": "sha512-9BCxFwvbGg/RsZK9tjXd8s4UcwR0MWeFQ1XEKIQVVvAGJyINdrqKMcTRyLoK8Rse1GjzLV9cwjWV1olXRWEXVA==", "dev": true }, "@types/cookie": { @@ -1698,19 +1732,22 @@ "dev": true }, "@types/cors": { - "version": "2.8.13", - "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.13.tgz", - "integrity": "sha512-RG8AStHlUiV5ysZQKq97copd2UmVYw3/pRMLefISZ3S1hK104Cwm7iLQ3fTKx+lsUH2CE8FlLaYeEA2LSeqYUA==", + "version": "2.8.17", + "resolved": "https://registry.npmjs.org/@types/cors/-/cors-2.8.17.tgz", + "integrity": "sha512-8CGDvrBj1zgo2qE+oS3pOCyYNqCPryMWY2bGfwA0dcfopWGgxs+78df0Rs3rc9THP4JkOhLsAa+15VdpAqkcUA==", "dev": true, "requires": { "@types/node": "*" } }, "@types/node": { - "version": "18.13.0", - "resolved": "https://registry.npmjs.org/@types/node/-/node-18.13.0.tgz", - "integrity": "sha512-gC3TazRzGoOnoKAhUx+Q0t8S9Tzs74z7m0ipwGpSqQrleP14hKxP4/JUeEQcD3W1/aIpnWl8pHowI7WokuZpXg==", - "dev": true + "version": "22.10.1", + "resolved": "https://registry.npmjs.org/@types/node/-/node-22.10.1.tgz", + "integrity": "sha512-qKgsUwfHZV2WCWLAnVP1JqnpE6Im6h3Y0+fYgMTasNQ7V++CBX5OT1as0g0f+OyubbFqhf6XVNIsmN4IIhEgGQ==", + "dev": true, + "requires": { + "undici-types": "~6.20.0" + } }, "accepts": { "version": "1.3.8", @@ -1796,12 +1833,12 @@ } }, "braces": { - "version": "3.0.2", - "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.2.tgz", - "integrity": "sha512-b8um+L1RzM3WDSzvhm6gIz1yfTbBt6YTlcEKAvsmqCZZFw46z626lVj9j1yEPW33H5H+lBQpZMP1k8l+78Ha0A==", + "version": "3.0.3", + "resolved": "https://registry.npmjs.org/braces/-/braces-3.0.3.tgz", + "integrity": "sha512-yQbXgO/OSZVD2IsiLlro+7Hf6Q18EJrKSEsdoMzKePKXct3gvD8oLcOQdIzGupr5Fj+EDe8gO/lxc1BzfMpxvA==", "dev": true, "requires": { - "fill-range": "^7.0.1" + "fill-range": "^7.1.1" } }, "bytes": { @@ -1890,9 +1927,9 @@ "dev": true }, "cookie": { - "version": "0.4.2", - "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.4.2.tgz", - "integrity": "sha512-aSWTXFzaKWkvHO1Ny/s+ePFpvKsPnjc551iI41v3ny/ow6tBG5Vd+FuqGNhh1LxOmVzOlGUriIlOaokOvhaStA==", + "version": "0.7.2", + "resolved": "https://registry.npmjs.org/cookie/-/cookie-0.7.2.tgz", + "integrity": "sha512-yki5XnKuf750l50uGTllt6kKILY4nQ1eNIQatoXEByZ5dWgnKqbnqmTrBE5B4N7lrMJKQ2ytWMiTO2o0v6Ew/w==", "dev": true }, "cors": { @@ -1986,9 +2023,9 @@ "dev": true }, "engine.io": { - "version": "6.4.2", - "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.4.2.tgz", - "integrity": "sha512-FKn/3oMiJjrOEOeUub2WCox6JhxBXq/Zn3fZOMCBxKnNYtsdKjxhl7yR3fZhM9PV+rdE75SU5SYMc+2PGzo+Tg==", + "version": "6.6.2", + "resolved": "https://registry.npmjs.org/engine.io/-/engine.io-6.6.2.tgz", + "integrity": "sha512-gmNvsYi9C8iErnZdVcJnvCpSKbWTt1E8+JZo8b+daLninywUWi5NQ5STSHZ9rFjFO7imNcvb8Pc5pe/wMR5xEw==", "dev": true, "requires": { "@types/cookie": "^0.4.1", @@ -1996,34 +2033,34 @@ "@types/node": ">=10.0.0", "accepts": "~1.3.4", "base64id": "2.0.0", - "cookie": "~0.4.1", + "cookie": "~0.7.2", "cors": "~2.8.5", "debug": "~4.3.1", - "engine.io-parser": "~5.0.3", - "ws": "~8.11.0" + "engine.io-parser": "~5.2.1", + "ws": "~8.17.1" }, "dependencies": { "debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "requires": { - "ms": "2.1.2" + "ms": "^2.1.3" } }, "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true } } }, "engine.io-parser": { - "version": "5.0.6", - "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.0.6.tgz", - "integrity": "sha512-tjuoZDMAdEhVnSFleYPCtdL2GXwVTGtNjoeJd9IhIG3C1xs9uwxqRNEu5WpnDZCaozwVlK/nuQhpodhXSIMaxw==", + "version": "5.2.3", + "resolved": "https://registry.npmjs.org/engine.io-parser/-/engine.io-parser-5.2.3.tgz", + "integrity": "sha512-HqD3yTBfnBxIrbnM1DoD6Pcq8NECnh8d4As1Qgh0z5Gg3jRRIqijury0CL3ghu/edArpUYiYqQiDUQBIs4np3Q==", "dev": true }, "ent": { @@ -2072,9 +2109,9 @@ "dev": true }, "fill-range": { - "version": "7.0.1", - "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.0.1.tgz", - "integrity": "sha512-qOo9F+dMUmC2Lcb4BbVvnKJxTPjCm+RRpe4gDuGrzkL7mEVl/djYSu2OdQ2Pa302N4oqkSg9ir6jaLWJ2USVpQ==", + "version": "7.1.1", + "resolved": "https://registry.npmjs.org/fill-range/-/fill-range-7.1.1.tgz", + "integrity": "sha512-YsGpe3WHLK8ZYi4tWDg2Jy3ebRz2rXowDxnld4bkQB00cc/1Zw9AWnC0i9ztDJitivtQvaI9KaLyKrc+hBW0yg==", "dev": true, "requires": { "to-regex-range": "^5.0.1" @@ -2651,17 +2688,18 @@ } }, "socket.io": { - "version": "4.6.0", - "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.6.0.tgz", - "integrity": "sha512-b65bp6INPk/BMMrIgVvX12x3Q+NqlGqSlTuvKQWt0BUJ3Hyy3JangBl7fEoWZTXbOKlCqNPbQ6MbWgok/km28w==", + "version": "4.8.1", + "resolved": "https://registry.npmjs.org/socket.io/-/socket.io-4.8.1.tgz", + "integrity": "sha512-oZ7iUCxph8WYRHHcjBEc9unw3adt5CmSNlppj/5Q4k2RIrhl8Z5yY2Xr4j9zj0+wzVZ0bxmYoGSzKJnRl6A4yg==", "dev": true, "requires": { "accepts": "~1.3.4", "base64id": "~2.0.0", + "cors": "~2.8.5", "debug": "~4.3.2", - "engine.io": "~6.4.0", + "engine.io": "~6.6.0", "socket.io-adapter": "~2.5.2", - "socket.io-parser": "~4.2.1" + "socket.io-parser": "~4.2.4" }, "dependencies": { "debug": { @@ -2682,18 +2720,36 @@ } }, "socket.io-adapter": { - "version": "2.5.2", - "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.2.tgz", - "integrity": "sha512-87C3LO/NOMc+eMcpcxUBebGjkpMDkNBS9tf7KJqcDsmL936EChtVva71Dw2q4tQcuVC+hAUy4an2NO/sYXmwRA==", + "version": "2.5.5", + "resolved": "https://registry.npmjs.org/socket.io-adapter/-/socket.io-adapter-2.5.5.tgz", + "integrity": "sha512-eLDQas5dzPgOWCk9GuuJC2lBqItuhKI4uxGgo9aIV7MYbk2h9Q6uULEh8WBzThoI7l+qU9Ast9fVUmkqPP9wYg==", "dev": true, "requires": { - "ws": "~8.11.0" + "debug": "~4.3.4", + "ws": "~8.17.1" + }, + "dependencies": { + "debug": { + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", + "dev": true, + "requires": { + "ms": "^2.1.3" + } + }, + "ms": { + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", + "dev": true + } } }, "socket.io-parser": { - "version": "4.2.3", - "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.3.tgz", - "integrity": "sha512-JMafRntWVO2DCJimKsRTh/wnqVvO4hrfwOqtO7f+uzwsQMuxO6VwImtYxaQ+ieoyshWOTJyV0fA21lccEXRPpQ==", + "version": "4.2.4", + "resolved": "https://registry.npmjs.org/socket.io-parser/-/socket.io-parser-4.2.4.tgz", + "integrity": "sha512-/GbIKmo8ioc+NIWIhwdecY0ge+qVBSMdgxGygevmdHj24bsfgtCmcUUcQ5ZzcylGFHsN3k4HB4Cgkl96KVnuew==", "dev": true, "requires": { "@socket.io/component-emitter": "~3.1.0", @@ -2701,18 +2757,18 @@ }, "dependencies": { "debug": { - "version": "4.3.4", - "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.4.tgz", - "integrity": "sha512-PRWFHuSU3eDtQJPvnNY7Jcket1j0t5OuOsFzPPzsekD52Zl8qUfFIPEiswXqIvHWGVHOgX+7G/vCNNhehwxfkQ==", + "version": "4.3.7", + "resolved": "https://registry.npmjs.org/debug/-/debug-4.3.7.tgz", + "integrity": "sha512-Er2nc/H7RrMXZBFCEim6TCmMk02Z8vLC2Rbi1KEBggpo0fS6l0S1nnapwmIi3yW/+GOJap1Krg4w0Hg80oCqgQ==", "dev": true, "requires": { - "ms": "2.1.2" + "ms": "^2.1.3" } }, "ms": { - "version": "2.1.2", - "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.2.tgz", - "integrity": "sha512-sGkPx+VjMtmA6MX27oA4FBFELFCZZ4S4XqeGOXCv68tT+jb3vk/RyaKWP0PTKyWtmLSM0b+adUTEvbs1PEaH2w==", + "version": "2.1.3", + "resolved": "https://registry.npmjs.org/ms/-/ms-2.1.3.tgz", + "integrity": "sha512-6FlzubTLZG3J2a/NVCAleEhjzq5oxgHyaCU9yYXvcLsvoVaHJq/s5xXI6/XXP6tz7R9xAOtHnSO/tXtF3WRTlA==", "dev": true } } @@ -2817,6 +2873,12 @@ "integrity": "sha512-s8ax/CeZdK9R/56Sui0WM6y9OFREJarMRHqLB2EwkovemBxNQ+Bqu8GAsUnVcXKgphb++ghr/B2BZx4mahujPw==", "dev": true }, + "undici-types": { + "version": "6.20.0", + "resolved": "https://registry.npmjs.org/undici-types/-/undici-types-6.20.0.tgz", + "integrity": "sha512-Ny6QZ2Nju20vw1SRHe3d9jVu6gJ+4e3+MMpqu7pqE5HT6WsTSlce++GQmK5UXS8mzV8DSYHrQH+Xrf2jVcuKNg==", + "dev": true + }, "universalify": { "version": "0.1.2", "resolved": "https://registry.npmjs.org/universalify/-/universalify-0.1.2.tgz", @@ -2874,9 +2936,9 @@ "dev": true }, "ws": { - "version": "8.11.0", - "resolved": "https://registry.npmjs.org/ws/-/ws-8.11.0.tgz", - "integrity": "sha512-HPG3wQd9sNQoT9xHyNCXoDUa+Xw/VevmY9FoHyQ+g+rrMn4j6FB4np7Z0OhdTgjx6MgQLK7jwSy1YecU1+4Asg==", + "version": "8.17.1", + "resolved": "https://registry.npmjs.org/ws/-/ws-8.17.1.tgz", + "integrity": "sha512-6XQFvXTkbfUOZOKKILFG1PDK2NDQs4azKQl26T0YS5CxqWLgXajbPZ+h4gZekJyRqFU8pvnbAbbs/3TgRPy+GQ==", "dev": true, "requires": {} }, diff --git a/tools/ci_build/github/android/build_aar_package.py b/tools/ci_build/github/android/build_aar_package.py index 19f66245a45e2..1b34b3d302e57 100644 --- a/tools/ci_build/github/android/build_aar_package.py +++ b/tools/ci_build/github/android/build_aar_package.py @@ -23,11 +23,11 @@ # Onnx Runtime native library is built against NDK API 21 by default # It is possible to build from source for Android API levels below 21, but it is not guaranteed -DEFAULT_ANDROID_MIN_SDK_VER = 21 +DEFAULT_ANDROID_MIN_SDK_VER = 24 # Android API 24 is the default target API version for Android builds, based on Microsoft 1CS requirements # It is possible to build from source using API level 21 and higher as the target SDK version -DEFAULT_ANDROID_TARGET_SDK_VER = 24 +DEFAULT_ANDROID_TARGET_SDK_VER = 34 def _parse_build_settings(args): diff --git a/tools/ci_build/github/android/default_full_aar_build_settings.json b/tools/ci_build/github/android/default_full_aar_build_settings.json index b0eff75812673..1c7769c623d41 100644 --- a/tools/ci_build/github/android/default_full_aar_build_settings.json +++ b/tools/ci_build/github/android/default_full_aar_build_settings.json @@ -5,8 +5,8 @@ "x86", "x86_64" ], - "android_min_sdk_version": 21, - "android_target_sdk_version": 24, + "android_min_sdk_version": 24, + "android_target_sdk_version": 34, "build_params": [ "--enable_lto", "--android", diff --git a/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml new file mode 100644 index 0000000000000..9a721c65de332 --- /dev/null +++ b/tools/ci_build/github/azure-pipelines/stages/jobs/steps/py_packaging_test_step.yml @@ -0,0 +1,21 @@ +parameters: +- name: EP_NAME + type: string + default: CPU + +- name: PYTHON_VERSION + type: string + +steps: +- powershell: | + python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq + Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} + mkdir -p $(Agent.TempDirectory)\ort_test_data + Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data + Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data + cd $(Agent.TempDirectory)\ort_test_data + python onnx_backend_test_series.py --devices ${{ parameters.EP_NAME }} -v + cd $(Agent.TempDirectory) + Remove-Item -Path $(Agent.TempDirectory)\ort_test_data -Recurse -Force + workingDirectory: '$(Build.sourcesDirectory)' + displayName: 'Run Python Tests with ${{ parameters.EP_NAME }} EP' \ No newline at end of file diff --git a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml index 947e4f99b984f..f7235e3ad2076 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-gpu-packaging-stage.yml @@ -56,7 +56,7 @@ stages: PYTHON_VERSION: ${{ python_version }} EP_NAME: gpu CudaVersion: ${{ parameters.cuda_version }} - EP_BUILD_FLAGS: --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" + EP_BUILD_FLAGS: --use_dml --enable_lto --cuda_home=$(Agent.TempDirectory)\v${{ parameters.cuda_version }} --cmake_extra_defines "CMAKE_CUDA_ARCHITECTURES=52;60;61;70;75;80" use_tensorrt: True - ${{ if eq(parameters.enable_linux_cuda, true) }}: diff --git a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml index aa7f2845fc0fa..dd0539f751c89 100644 --- a/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml +++ b/tools/ci_build/github/azure-pipelines/stages/py-win-gpu-stage.yml @@ -33,7 +33,7 @@ parameters: - Release - RelWithDebInfo - MinSizeRel - + - name: use_tensorrt type: boolean default: false @@ -134,7 +134,7 @@ stages: --cmake_generator "$(VSGenerator)" --enable_pybind --enable_onnx_tests - --parallel --use_binskim_compliant_compile_flags --update --build + --parallel 4 --use_binskim_compliant_compile_flags --update --build $(TelemetryOption) ${{ parameters.BUILD_PY_PARAMETERS }} ${{ parameters.EP_BUILD_FLAGS }} ${{ variables.trt_build_flag }} workingDirectory: '$(Build.BinariesDirectory)' @@ -206,19 +206,20 @@ stages: DownloadTRT: ${{ parameters.use_tensorrt }} - task: PowerShell@2 - displayName: 'Install ONNX' + displayName: 'Install Third Party Dependencies' inputs: filePath: '$(Build.SourcesDirectory)/tools/ci_build/github/windows/install_third_party_deps.ps1' workingDirectory: '$(Build.BinariesDirectory)' arguments: -cpu_arch x64 -install_prefix $(Build.BinariesDirectory)\${{ parameters.cmake_build_type }}\installed -build_config ${{ parameters.cmake_build_type }} - - powershell: | - python -m pip uninstall -y onnxruntime onnxruntime-gpu -qq - Get-ChildItem -Path $(Build.ArtifactStagingDirectory)/*cp${{ replace(parameters.PYTHON_VERSION,'.','') }}*.whl | foreach {pip --disable-pip-version-check install --upgrade $_.fullname tabulate} - mkdir -p $(Agent.TempDirectory)\ort_test_data - Copy-Item -Path $(Build.sourcesDirectory)/onnxruntime/test/python/onnx_backend_test_series.py -Destination $(Agent.TempDirectory)\ort_test_data - Copy-Item -Recurse -Path $(Build.sourcesDirectory)/onnxruntime/test/testdata -Destination $(Agent.TempDirectory)\ort_test_data - cd $(Agent.TempDirectory)\ort_test_data - python onnx_backend_test_series.py - workingDirectory: '$(Build.sourcesDirectory)' - displayName: 'Run Python Tests' + - template: jobs/steps/py_packaging_test_step.yml + parameters: + EP_NAME: DML + PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }} + + - template: jobs/steps/py_packaging_test_step.yml + parameters: + EP_NAME: CUDA + PYTHON_VERSION: ${{ parameters.PYTHON_VERSION }} + + diff --git a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml index 4fa36a1ff548b..949479fb8b5e4 100644 --- a/tools/ci_build/github/azure-pipelines/templates/download-deps.yml +++ b/tools/ci_build/github/azure-pipelines/templates/download-deps.yml @@ -11,7 +11,7 @@ steps: packageType: upack feed: '/7424c8e4-5c62-490e-95c4-79446f31017c' definition: '517c4f6f-5437-4392-a70d-4f15ec5be2f0' - version: 1.0.200 + version: 1.0.201 downloadPath: $(Build.BinariesDirectory)/deps # The private ADO project @@ -22,7 +22,7 @@ steps: packageType: upack feed: '/4c7631f5-24c0-4307-8822-1aa8f180c325' definition: 'fd9dd5ad-b73e-4678-890e-edcf680dbc1a' - version: 1.0.200 + version: 1.0.201 downloadPath: $(Build.BinariesDirectory)/deps # You can add more ADO accounts at here. diff --git a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml index 080079388a76c..ab31e592d7d71 100644 --- a/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml +++ b/tools/ci_build/github/azure-pipelines/templates/mac-cpu-packaging-pipeline.yml @@ -68,9 +68,6 @@ stages: jobs: - job: MacOS_C_API_Package_Publish pool: - ${{ if eq(parameters.DoESRP, true)}}: - vmImage: 'macOS-12' - ${{ else }}: vmImage: 'macOS-13' steps: - checkout: none diff --git a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml index d8ea1c35c89c4..29c5f6bb34d7a 100644 --- a/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml +++ b/tools/ci_build/github/azure-pipelines/templates/react-native-ci.yml @@ -261,8 +261,6 @@ stages: publishJUnitResults: true testResultsFiles: '**/TEST-*.xml' testRunTitle: 'React Native Android Instrumented Test results' - javaHomeOption: 'path' - jdkDirectory: '$(JAVA_HOME_11_X64)' sonarQubeRunAnalysis: false spotBugsAnalysis: false displayName: Run React Native Android Instrumented Tests