Merge branch 'main' of https://github.com/microsoft/onnxruntime into …

…pengwa/mem_improvement
microsoft · Dec 26, 2023 · a3a7ab8 · a3a7ab8
2 parents 6331293 + 37f7436
commit a3a7ab8
Show file tree

Hide file tree

Showing 96 changed files with 2,689 additions and 3,434 deletions.
diff --git a/.github/workflows/generated_fake_win_gpu_ci.yml b/.github/workflows/generated_fake_win_gpu_ci.yml
diff --git a/.github/workflows/labeler.yml b/.github/workflows/labeler.yml
@@ -7,7 +7,7 @@ jobs:
   triage:
     runs-on: ubuntu-latest
     steps:
-    - uses: github/issue-labeler@v3.2
+    - uses: github/issue-labeler@v3.3
       with:
         repo-token: "${{ secrets.GITHUB_TOKEN }}"
         configuration-path: .github/labeler.yml

diff --git a/.github/workflows/publish-java-apidocs.yml b/.github/workflows/publish-java-apidocs.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Set up JDK 11
-        uses: actions/setup-java@v3
+        uses: actions/setup-java@v4
         with:
           java-version: '11'
           distribution: 'adopt'

diff --git a/.github/workflows/publish-js-apidocs.yml b/.github/workflows/publish-js-apidocs.yml
@@ -25,7 +25,7 @@ jobs:
     steps:
       - uses: actions/checkout@v4
       - name: Setup Node.js
-        uses: actions/setup-node@v3
+        uses: actions/setup-node@v4
         with:
           node-version: 18
       - name: Generate JS docs 

diff --git a/.github/workflows/windows.yml b/.github/workflows/windows.yml
@@ -26,7 +26,7 @@ jobs:
           python-version: '3.11.x'
           architecture: 'x64'
 
-      - uses: actions/setup-node@v3
+      - uses: actions/setup-node@v4
         with:
           node-version: 18
 
@@ -49,7 +49,7 @@ jobs:
       - uses: actions/checkout@v4
         with:
           submodules: true
-      - uses: conda-incubator/setup-miniconda@v2
+      - uses: conda-incubator/setup-miniconda@v3
         with:
           activate-environment: "ort_build"
           python-version: 3.8

diff --git a/cmake/CMakeLists.txt b/cmake/CMakeLists.txt
@@ -96,6 +96,7 @@ option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to prov
 option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
 option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)
 
+cmake_dependent_option(onnxruntime_USE_CUTLASS "Build with cutlass support" ON "onnxruntime_USE_CUDA" OFF)
 cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "NOT WIN32; onnxruntime_USE_CUDA" OFF)
 option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
 
@@ -693,16 +694,20 @@ if (onnxruntime_USE_CUDA)
   enable_language(CUDA)
   message( STATUS "CMAKE_CUDA_COMPILER_VERSION: ${CMAKE_CUDA_COMPILER_VERSION}")
 
-  if (onnxruntime_DISABLE_CONTRIB_OPS)
-    set(onnxruntime_USE_FLASH_ATTENTION OFF)
-    set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
-  endif()
   if (CMAKE_CUDA_COMPILER_VERSION VERSION_LESS 11.6)
-    message( STATUS "Turn off flash attention since CUDA compiler version < 11.6")
-    set(onnxruntime_USE_FLASH_ATTENTION OFF)
-    set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
+    message( STATUS "Turn off cutlass since CUDA compiler version < 11.6")
+    set(onnxruntime_USE_CUTLASS OFF)
   endif()
 else()
+  set(onnxruntime_USE_CUTLASS OFF)
+endif()
+
+if (NOT onnxruntime_USE_CUTLASS OR onnxruntime_DISABLE_CONTRIB_OPS)
+    if (onnxruntime_DISABLE_CONTRIB_OPS)
+      message( STATUS "Turn off flash attention/memory efficient attention since contrib ops are disabled")
+    else()
+      message( STATUS "Turn off flash attention/memory efficient attention since cutlass is not enabled")
+    endif()
   set(onnxruntime_USE_FLASH_ATTENTION OFF)
   set(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION OFF)
 endif()
@@ -887,6 +892,11 @@ function(onnxruntime_set_compile_flags target_name)
     if (onnxruntime_ENABLE_ATEN)
       target_compile_definitions(${target_name} PRIVATE ENABLE_ATEN)
     endif()
+
+    if (onnxruntime_USE_CUTLASS)
+      target_compile_definitions(${target_name} PRIVATE USE_CUTLASS)
+    endif()
+
     set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR ON)
     if (onnxruntime_USE_CUDA)
       # Suppress a "conversion_function_not_usable" warning in gsl/span

diff --git a/cmake/external/cutlass.cmake b/cmake/external/cutlass.cmake
@@ -1,4 +1,4 @@
-if (onnxruntime_USE_FLASH_ATTENTION OR onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION)
+if (onnxruntime_USE_CUTLASS)
   include(FetchContent)
   FetchContent_Declare(
     cutlass

diff --git a/csharp/OnnxRuntime.CSharp.proj b/csharp/OnnxRuntime.CSharp.proj
@@ -20,6 +20,8 @@ CMake creates a target to this project
     <IsWindowsBuild Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Windows)))' == 'true'">true</IsWindowsBuild>
     <IsLinuxBuild Condition="'$([System.Runtime.InteropServices.RuntimeInformation]::IsOSPlatform($([System.Runtime.InteropServices.OSPlatform]::Linux)))' == 'true'">true</IsLinuxBuild>
     <ExecutionProvider Condition=" '$(ExecutionProvider)' == '' ">None</ExecutionProvider>
+    <IsPlatformSpecificSubPackage Condition=" '$(IsPlatformSpecificSubPackage)' == '' ">false</IsPlatformSpecificSubPackage>
+    <NuSpecName Condition=" '$(NuSpecName)' == '' ">NativeNuget.nuspec</NuSpecName>
 
     <!-- include Xamarin/MAUI Android and iOS target frameworks? Command line property overrides this value. -->
     <IncludeMobileTargets>true</IncludeMobileTargets>
@@ -45,6 +47,13 @@ CMake creates a target to this project
     <PythonExe>python</PythonExe>
   </PropertyGroup>
 
+  <PropertyGroup>
+    <!-- If we create multiple nuget packages in one job, major package and dependent packages version should be the same-->
+    <!-- CurrentDate and CurrentTime are only used for dev packages-->
+    <CurrentDate Condition="'$(CurrentDate)'==''">$([System.DateTime]::UtcNow.ToString(yyyyMMdd))</CurrentDate>
+    <CurrentTime Condition="'$(CurrentTime)'==''">$([System.DateTime]::UtcNow.ToString(hhmm))</CurrentTime>
+  </PropertyGroup>
+
   <Import Project="$(MSBuildToolsPath)\Microsoft.CSharp.targets" />
 
   <Target Name="Build">
@@ -58,7 +67,7 @@ CMake creates a target to this project
     </Exec>
   </Target>
 
-   <Target Name="ObtainPackageVersion" BeforeTargets="Build;CreatePackage;CreateWindowsAIPackage">
+   <Target Name="ObtainPackageVersion" BeforeTargets="Build;CreatePackage;CreateWindowsAIPackage;CreateNativePackage">
     <ReadLinesFromFile File="..\VERSION_NUMBER">
       <Output TaskParameter="Lines" ItemName="MajorVersionNumber"/>
     </ReadLinesFromFile>
@@ -81,8 +90,6 @@ CMake creates a target to this project
       <Output TaskParameter="AbsolutePaths" PropertyName="OnnxRuntimeSourceDirectoryAbs"/>
     </ConvertToAbsolutePath>
     <PropertyGroup>
-      <CurrentDate>$([System.DateTime]::UtcNow.ToString(yyyyMMdd))</CurrentDate>
-      <CurrentTime>$([System.DateTime]::UtcNow.ToString(hhmm))</CurrentTime>
       <PackageVersion>@(MajorVersionNumber)</PackageVersion>
       <PackageVersion>$(PackageVersion)$(ReleaseVersionSuffix)</PackageVersion>
       <Version>$(PackageVersion)</Version>
@@ -91,23 +98,10 @@ CMake creates a target to this project
     <Message Importance="High" Text="PackageVersion=$(PackageVersion)" />
   </Target>
 
-  <Target Name="CreatePackage">
-    <!-- Validate no missing UnmanagedFunctionPointer attributes on delegates. -->
-    <Exec Condition="'$(IsWindowsBuild)' == 'true' AND $(OrtPackageId) == 'Microsoft.ML.OnnxRuntime'"
-          Command="$(PythonExe) tools/ValidateNativeDelegateAttributes.py"
-          ContinueOnError="False">
-        <Output TaskParameter="ConsoleOutput" PropertyName="GenerateNuspecOutput" />
-    </Exec>
-
-    <!-- Create Microsoft.ML.OnnxRuntime.Managed with the C# bindings using the C# project -->
-    <Message Importance="High" Text="Creating Microsoft.ML.OnnxRuntime.Managed nuget package..." />
-    <MSBuild Projects="src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj"
-             Targets="CopyMiscFiles;Pack"
-            Properties="NoBuild=true;Platform=AnyCPU;PackageVersion=$(PackageVersion);OrtPackageId=$(OrtPackageId);IncludeMobileTargets=$(IncludeMobileTargets)"/>
-
-    <!-- Manually create the nuspec for the native Microsoft.ML.OnnxRuntime package -->
-    <Message Importance="High" Text="Generating nuspec for the native Microsoft.ML.OnnxRuntime nuget package..." />
-    <Exec Command="$(PythonExe) $(GenerateNuspecScript) --package_version $(PackageVersion) --package_name $(OrtPackageId) --target_architecture $(TargetArchitecture) --build_config $(Configuration) --native_build_path $(NativeBuildOutputDirAbs) --packages_path $(OnnxRuntimePackagesDirectoryAbs) --ort_build_path $(OnnxRuntimeBuildDirectoryAbs) --sources_path $(OnnxRuntimeSourceDirectoryAbs) --commit_id $(GitCommitHash) --is_release_build $(IsReleaseBuild) --execution_provider $(ExecutionProvider)"
+  <Target Name="CreateNativePackage">
+    <!-- Create the nuspec for the native OnnxRuntime package -->
+    <Message Importance="High" Text="Generating nuspec for the native $(OrtPackageId) nuget package..." />
+    <Exec Command="$(PythonExe) $(GenerateNuspecScript) --package_version $(PackageVersion) --package_name $(OrtPackageId) --target_architecture $(TargetArchitecture) --build_config $(Configuration) --native_build_path $(NativeBuildOutputDirAbs) --packages_path $(OnnxRuntimePackagesDirectoryAbs) --ort_build_path $(OnnxRuntimeBuildDirectoryAbs) --sources_path $(OnnxRuntimeSourceDirectoryAbs) --commit_id $(GitCommitHash) --is_release_build $(IsReleaseBuild) --execution_provider $(ExecutionProvider) --nuspec_name $(NuSpecName)"
           ContinueOnError="False"
           ConsoleToMSBuild="true">
         <Output TaskParameter="ConsoleOutput" PropertyName="GenerateNuspecOutput" />
@@ -116,7 +110,7 @@ CMake creates a target to this project
     <!-- run `nuget pack` on Windows or `dotnet pack` on Linux to create the native nupkg -->
     <Message Importance="High" Text="Bundling native shared library artifacts into Microsoft.ML.OnnxRuntime nuget package..." />
     <Exec Condition=" '$(IsWindowsBuild)' == 'true'"
-          Command="$(NugetExe) pack NativeNuget.nuspec"
+          Command="$(NugetExe) pack $(NuSpecName)"
           WorkingDirectory="$(NativeBuildOutputDirAbs)"
           ContinueOnError="False"
           ConsoleToMSBuild="true">
@@ -144,6 +138,34 @@ CMake creates a target to this project
     />
   </Target>
 
+  <Target Name="CreatePackage">
+    <!-- Validate no missing UnmanagedFunctionPointer attributes on delegates. -->
+    <Exec Condition="'$(IsWindowsBuild)' == 'true' AND $(OrtPackageId) == 'Microsoft.ML.OnnxRuntime'"
+          Command="$(PythonExe) tools/ValidateNativeDelegateAttributes.py"
+          ContinueOnError="False">
+        <Output TaskParameter="ConsoleOutput" PropertyName="GenerateNuspecOutput" />
+    </Exec>
+
+    <!-- Create Microsoft.ML.OnnxRuntime.Managed with the C# bindings using the C# project -->
+    <Message Condition="'$(IsPlatformSpecificSubPackage)'!='True'" Importance="High" Text="Creating Microsoft.ML.OnnxRuntime.Managed nuget package..." />
+    <MSBuild Condition="'$(IsPlatformSpecificSubPackage)'!='True'" Projects="src\Microsoft.ML.OnnxRuntime\Microsoft.ML.OnnxRuntime.csproj"
+             Targets="CopyMiscFiles;Pack"
+             Properties="NoBuild=true;Platform=AnyCPU;PackageVersion=$(PackageVersion);OrtPackageId=$(OrtPackageId);IncludeMobileTargets=$(IncludeMobileTargets)"/>
+
+    <MSBuild Projects ="$(MSBuildProjectFullPath)"
+             Targets ="CreateNativePackage" />
+
+  </Target>
+
+  <Target Name="CreateAllGpuPackages" AfterTargets="CreatePackage" Condition="'$(OrtPackageId)' == 'Microsoft.ML.OnnxRuntime.Gpu'">
+    <MSBuild Projects ="$(MSBuildProjectFullPath)"
+             Properties="NuSpecName=NativeGpuWinNuget.nuspec;OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu.Windows"
+             Targets ="CreateNativePackage" />
+    <MSBuild Projects ="$(MSBuildProjectFullPath)"
+             Properties="NuSpecName=NativeGpuLinuxNuget.nuspec;OrtPackageId=Microsoft.ML.OnnxRuntime.Gpu.Linux"
+             Targets ="CreateNativePackage" />
+  </Target>
+
   <ItemGroup>
     <LicenseFile Include="$(OnnxRuntimeSourceDirectory)\LICENSE"/>
   </ItemGroup>

diff --git a/csharp/src/Microsoft.ML.OnnxRuntime/targets/netstandard/props.xml b/csharp/src/Microsoft.ML.OnnxRuntime/targets/netstandard/props.xml
@@ -37,7 +37,7 @@
   <PropertyGroup>
     <EnginePlatform Condition="'$(Platform)' == 'Win32'">x86</EnginePlatform>
     <EnginePlatform Condition="'$(Platform)' == 'ARM64'">arm64</EnginePlatform>
-    <EnginePlatform Condition="'$(Platform)' == 'ARM'">arm</EnginePlatform>	
+    <EnginePlatform Condition="'$(Platform)' == 'ARM'">arm</EnginePlatform>
     <EnginePlatform Condition="'$(Platform)' != 'Win32' AND '$(Platform)' != 'ARM64'">$(Platform)</EnginePlatform>
   </PropertyGroup>
 
@@ -55,7 +55,8 @@
   <ItemGroup>
     <!-- x64 -->
     <None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\onnxruntime.dll"
-          Condition="'$(PlatformTarget)' == 'x64' OR ('$(PlatformTarget)' == 'AnyCPU' AND '$(Prefer32Bit)' != 'true')">
+          Condition="('$(PlatformTarget)' == 'x64' OR ('$(PlatformTarget)' == 'AnyCPU' AND '$(Prefer32Bit)' != 'true')) AND
+                     Exists('$(MSBuildThisFileDirectory)..\..\runtimes\win-x64\native\onnxruntime.dll')">
       <Link>onnxruntime.dll</Link>
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
       <Visible>false</Visible>
@@ -146,8 +147,8 @@
       <CopyToOutputDirectory>PreserveNewest</CopyToOutputDirectory>
       <Visible>false</Visible>
     </None>
-	
-    <!-- x86 -->	
+
+    <!-- x86 -->
     <None Include="$(MSBuildThisFileDirectory)..\..\runtimes\win-x86\native\onnxruntime.dll"
           Condition="('$(PlatformTarget)' == 'x86' OR ('$(PlatformTarget)' == 'AnyCPU' AND '$(Prefer32Bit)' == 'true'))">
       <Link>onnxruntime.dll</Link>

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests/runtest.bat b/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests/runtest.bat
@@ -52,9 +52,12 @@ IF NOT errorlevel 0 (
 %dn% list test\Microsoft.ML.OnnxRuntime.EndToEndTests\Microsoft.ML.OnnxRuntime.EndToEndTests.csproj package
 dir test\Microsoft.ML.OnnxRuntime.EndToEndTests\packages\
 
-IF "%PACKAGENAME%"=="Microsoft.ML.OnnxRuntime.Gpu" (
+set gpu_package=F
+IF "%PACKAGENAME%"=="Microsoft.ML.OnnxRuntime.Gpu" set gpu_package=T
+IF "%PACKAGENAME%"=="Microsoft.ML.OnnxRuntime.Gpu.Windows" set gpu_package=T
+IF %%gpu_package%%=="T" (
   set TESTONGPU=ON
-  %dn% test -p:DefineConstants=USE_TENSORRT test\Microsoft.ML.OnnxRuntime.EndToEndTests\Microsoft.ML.OnnxRuntime.EndToEndTests.csproj --no-restore --filter TensorRT 
+  %dn% test -p:DefineConstants=USE_TENSORRT test\Microsoft.ML.OnnxRuntime.EndToEndTests\Microsoft.ML.OnnxRuntime.EndToEndTests.csproj --no-restore --filter TensorRT
 
   IF NOT errorlevel 0 (
     @echo "Failed to build or execute the end-to-end test"

diff --git a/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests/runtest.sh b/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests/runtest.sh
@@ -32,7 +32,7 @@ if [ $RunTestCsharp = "true" ]; then
     exit 1
   fi
 
-  if [ $PACKAGENAME = "Microsoft.ML.OnnxRuntime.Gpu" ]; then
+  if [ $PACKAGENAME = "Microsoft.ML.OnnxRuntime.Gpu" ] || [ $PACKAGENAME = "Microsoft.ML.OnnxRuntime.Gpu.Linux" ]; then
     export TESTONGPU=ON
     dotnet test -p:DefineConstants=USE_CUDA $BUILD_SOURCESDIRECTORY/csharp/test/Microsoft.ML.OnnxRuntime.EndToEndTests/Microsoft.ML.OnnxRuntime.EndToEndTests.csproj --no-restore --verbosity detailed
     if [ $? -ne 0 ]; then

diff --git a/js/common/lib/backend-impl.ts b/js/common/lib/backend-impl.ts
@@ -82,7 +82,7 @@ export const resolveBackend = async(backendHints: readonly string[]): Promise<Ba
       const isInitializing = !!backendInfo.initPromise;
       try {
         if (!isInitializing) {
-          backendInfo.initPromise = backendInfo.backend.init();
+          backendInfo.initPromise = backendInfo.backend.init(backendName);
         }
         await backendInfo.initPromise;
         backendInfo.initialized = true;

diff --git a/js/common/lib/backend.ts b/js/common/lib/backend.ts
@@ -71,7 +71,7 @@ export interface Backend {
   /**
    * Initialize the backend asynchronously. Should throw when failed.
    */
-  init(): Promise<void>;
+  init(backendName: string): Promise<void>;
 
   createInferenceSessionHandler(uriOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
       Promise<InferenceSessionHandler>;

diff --git a/js/web/lib/backend-wasm.ts b/js/web/lib/backend-wasm.ts
@@ -4,7 +4,7 @@
 import {cpus} from 'node:os';
 import {Backend, env, InferenceSession, InferenceSessionHandler} from 'onnxruntime-common';
 
-import {initializeWebAssemblyInstance} from './wasm/proxy-wrapper';
+import {initializeOrtEp, initializeWebAssemblyAndOrtRuntime} from './wasm/proxy-wrapper';
 import {OnnxruntimeWebAssemblySessionHandler} from './wasm/session-handler-inference';
 
 /**
@@ -33,12 +33,23 @@ export const initializeFlags = (): void => {
 };
 
 export class OnnxruntimeWebAssemblyBackend implements Backend {
-  async init(): Promise<void> {
+  /**
+   * This function initializes the WebAssembly backend.
+   *
+   * This function will be called only once for each backend name. It will be called the first time when
+   * `ort.InferenceSession.create()` is called with a registered backend name.
+   *
+   * @param backendName - the registered backend name.
+   */
+  async init(backendName: string): Promise<void> {
     // populate wasm flags
     initializeFlags();
 
     // init wasm
-    await initializeWebAssemblyInstance();
+    await initializeWebAssemblyAndOrtRuntime();
+
+    // performe EP specific initialization
+    await initializeOrtEp(backendName);
   }
   createInferenceSessionHandler(path: string, options?: InferenceSession.SessionOptions):
       Promise<InferenceSessionHandler>;

diff --git a/js/web/lib/index.ts b/js/web/lib/index.ts
@@ -21,7 +21,7 @@ if (!BUILD_DEFS.DISABLE_WEBGL) {
 if (!BUILD_DEFS.DISABLE_WASM) {
   const wasmBackend = BUILD_DEFS.DISABLE_TRAINING ? require('./backend-wasm-inference').wasmBackend :
                                                     require('./backend-wasm-training').wasmBackend;
-  if (!BUILD_DEFS.DISABLE_WEBGPU && typeof navigator !== 'undefined' && navigator.gpu) {
+  if (!BUILD_DEFS.DISABLE_WEBGPU) {
     registerBackend('webgpu', wasmBackend, 5);
   }
   registerBackend('cpu', wasmBackend, 10);

diff --git a/js/web/lib/wasm/jsep/backend-webgpu.ts b/js/web/lib/wasm/jsep/backend-webgpu.ts
@@ -144,17 +144,7 @@ export class WebGpuBackend {
    */
   sessionExternalDataMapping: Map<number, Map<number, [number, GPUBuffer]>> = new Map();
 
-  async initialize(env: Env): Promise<void> {
-    if (!navigator.gpu) {
-      // WebGPU is not available.
-      throw new Error('WebGpuBackend: WebGPU is not available.');
-    }
-
-    const adapter = await navigator.gpu.requestAdapter();
-    if (!adapter) {
-      throw new Error('WebGpuBackend: Failed to get GPU adapter.');
-    }
-
+  async initialize(env: Env, adapter: GPUAdapter): Promise<void> {
     this.env = env;
     const requiredFeatures: GPUFeatureName[] = [];
     const deviceDescriptor: GPUDeviceDescriptor = {