Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into user/linneamay/resize-18
Browse files Browse the repository at this point in the history
  • Loading branch information
Linnea May committed Jan 11, 2024
2 parents 1d42e46 + 574c7ca commit d760cdf
Show file tree
Hide file tree
Showing 40 changed files with 1,727 additions and 804 deletions.
66 changes: 3 additions & 63 deletions .pipelines/OneBranch.Nuget-WindowsAI-Pipeline.Official.yml
Original file line number Diff line number Diff line change
Expand Up @@ -53,10 +53,6 @@ extends:
BuildArch: x86
PythonPackageName: pythonx86

- template: .pipelines/windowsai-steps.yml@self
parameters:
BuildArch: arm

- template: .pipelines/windowsai-steps.yml@self
parameters:
BuildArch: arm64
Expand All @@ -72,11 +68,6 @@ extends:
PythonPackageName: pythonx86
Runtime: static

- template: .pipelines/windowsai-steps.yml@self
parameters:
BuildArch: arm
Runtime: static

- template: .pipelines/windowsai-steps.yml@self
parameters:
BuildArch: arm64
Expand All @@ -94,11 +85,9 @@ extends:
dependsOn:
- Windows_Packaging_x64_dynamic
- Windows_Packaging_x86_dynamic
- Windows_Packaging_arm_dynamic
- Windows_Packaging_arm64_dynamic
- Windows_Packaging_x64_static
- Windows_Packaging_x86_static
- Windows_Packaging_arm_static
- Windows_Packaging_arm64_static
condition: succeeded()
steps:
Expand All @@ -120,12 +109,6 @@ extends:
artifactName: 'drop_Windows_Build_Windows_Packaging_arm64_dynamic'
targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm64'

- task: DownloadPipelineArtifact@0
displayName: 'Download Pipeline Artifact - NuGet DirectML arm'
inputs:
artifactName: 'drop_Windows_Build_Windows_Packaging_arm_dynamic'
targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm'

- task: DownloadPipelineArtifact@0
displayName: 'Download Pipeline Artifact - NuGet DirectML x64 StaticRuntime'
inputs:
Expand All @@ -144,12 +127,6 @@ extends:
artifactName: 'drop_Windows_Build_Windows_Packaging_arm64_static'
targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm64-static-runtime'

- task: DownloadPipelineArtifact@0
displayName: 'Download Pipeline Artifact - NuGet DirectML arm StaticRuntime'
inputs:
artifactName: 'drop_Windows_Build_Windows_Packaging_arm_static'
targetPath: '$(Build.BinariesDirectory)/nuget-artifact-arm-static-runtime'

- task: PowerShell@2
displayName: 'Bundle NuGet and other binaries'
inputs:
Expand Down Expand Up @@ -194,17 +171,7 @@ extends:
$arm64_static_runtime_nupkg_unzipped_directory = [System.IO.Path]::Combine($arm64_static_runtime_nupkg_unzipped_directory_root, 'binaries', [System.IO.Path]::GetFileNameWithoutExtension($arm64_static_runtime_nuget_package))
[System.IO.Compression.ZipFile]::ExtractToDirectory($arm64_static_runtime_nuget_package, $arm64_static_runtime_nupkg_unzipped_directory)
$nupkgs = (Get-ChildItem ..\nuget-artifact-arm -Filter Microsoft.AI.MachineLearning*.nupkg -Recurse)
$arm_nuget_package = $nupkgs[0].FullName
$arm_nupkg_unzipped_directory_root = $nupkgs[0].Directory.FullName
$arm_nupkg_unzipped_directory = [System.IO.Path]::Combine($arm_nupkg_unzipped_directory_root, 'binaries', [System.IO.Path]::GetFileNameWithoutExtension($arm_nuget_package))
[System.IO.Compression.ZipFile]::ExtractToDirectory($arm_nuget_package, $arm_nupkg_unzipped_directory)
$nupkgs = (Get-ChildItem ..\nuget-artifact-arm-static-runtime -Filter Microsoft.AI.MachineLearning*.nupkg -Recurse)
$arm_static_runtime_nuget_package = $nupkgs[0].FullName
$arm_static_runtime_nupkg_unzipped_directory_root = $nupkgs[0].Directory.FullName
$arm_static_runtime_nupkg_unzipped_directory = [System.IO.Path]::Combine($arm_static_runtime_nupkg_unzipped_directory_root, 'binaries', [System.IO.Path]::GetFileNameWithoutExtension($arm_static_runtime_nuget_package))
[System.IO.Compression.ZipFile]::ExtractToDirectory($arm_static_runtime_nuget_package, $arm_static_runtime_nupkg_unzipped_directory)
$x64_static_runtime_path_old = [System.IO.Path]::Combine($x64_static_runtime_nupkg_unzipped_directory, 'runtimes', 'win-x64', '_native')
$x64_static_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-x64', '_native', 'static')
Expand All @@ -216,10 +183,7 @@ extends:
$arm64_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm64', '_native')
$arm64_static_runtime_path_old = [System.IO.Path]::Combine($arm64_static_runtime_nupkg_unzipped_directory, 'runtimes', 'win-arm64', '_native')
$arm64_static_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm64', '_native', 'static')
$arm_runtime_path_old = [System.IO.Path]::Combine($arm_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native')
$arm_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native')
$arm_static_runtime_path_old = [System.IO.Path]::Combine($arm_static_runtime_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native')
$arm_static_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native', 'static')
$uap_build_path_old = [System.IO.Path]::Combine($x64_static_runtime_nupkg_unzipped_directory, 'build', 'native')
$uap_build_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'build', 'uap10.0')
Expand All @@ -228,8 +192,6 @@ extends:
New-Item -Path $x86_static_runtime_path_new -ItemType Directory
New-Item -Path $arm64_runtime_path_new -ItemType Directory
New-Item -Path $arm64_static_runtime_path_new -ItemType Directory
New-Item -Path $arm_runtime_path_new -ItemType Directory
New-Item -Path $arm_static_runtime_path_new -ItemType Directory
Copy-Item ([System.IO.Path]::Combine($x86_runtime_path_old, 'onnxruntime.dll')) $x86_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($x86_runtime_path_old, 'onnxruntime.lib')) $x86_runtime_path_new
Expand All @@ -241,11 +203,6 @@ extends:
Copy-Item ([System.IO.Path]::Combine($arm64_runtime_path_old, 'microsoft.ai.machinelearning.dll')) $arm64_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm64_runtime_path_old, 'microsoft.ai.machinelearning.lib')) $arm64_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'onnxruntime.dll')) $arm_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'onnxruntime.lib')) $arm_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'microsoft.ai.machinelearning.dll')) $arm_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'microsoft.ai.machinelearning.lib')) $arm_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($x64_static_runtime_path_old, 'onnxruntime.dll')) ([System.IO.Path]::Combine($x64_static_runtime_path_new, 'onnxruntime.dll'))
Copy-Item ([System.IO.Path]::Combine($x64_static_runtime_path_old, 'onnxruntime.lib')) ([System.IO.Path]::Combine($x64_static_runtime_path_new, 'onnxruntime.lib'))
Copy-Item ([System.IO.Path]::Combine($x64_static_runtime_path_old, 'microsoft.ai.machinelearning.dll')) ([System.IO.Path]::Combine($x64_static_runtime_path_new, 'microsoft.ai.machinelearning.dll'))
Expand All @@ -261,11 +218,6 @@ extends:
Copy-Item ([System.IO.Path]::Combine($arm64_static_runtime_path_old, 'microsoft.ai.machinelearning.dll')) ([System.IO.Path]::Combine($arm64_static_runtime_path_new, 'microsoft.ai.machinelearning.dll'))
Copy-Item ([System.IO.Path]::Combine($arm64_static_runtime_path_old, 'microsoft.ai.machinelearning.lib')) ([System.IO.Path]::Combine($arm64_static_runtime_path_new, 'microsoft.ai.machinelearning.lib'))
Copy-Item ([System.IO.Path]::Combine($arm_static_runtime_path_old, 'onnxruntime.dll')) ([System.IO.Path]::Combine($arm_static_runtime_path_new, 'onnxruntime.dll'))
Copy-Item ([System.IO.Path]::Combine($arm_static_runtime_path_old, 'onnxruntime.lib')) ([System.IO.Path]::Combine($arm_static_runtime_path_new, 'onnxruntime.lib'))
Copy-Item ([System.IO.Path]::Combine($arm_static_runtime_path_old, 'microsoft.ai.machinelearning.dll')) ([System.IO.Path]::Combine($arm_static_runtime_path_new, 'microsoft.ai.machinelearning.dll'))
Copy-Item ([System.IO.Path]::Combine($arm_static_runtime_path_old, 'microsoft.ai.machinelearning.lib')) ([System.IO.Path]::Combine($arm_static_runtime_path_new, 'microsoft.ai.machinelearning.lib'))
Copy-Item -Recurse $uap_build_path_old $uap_build_path_new
$merged_nuget_path = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'merged')
Expand Down Expand Up @@ -304,32 +256,20 @@ extends:
$arm64_nupkg_unzipped_directory = [System.IO.Path]::Combine($arm64_nupkg_unzipped_directory_root, 'symbols', [System.IO.Path]::GetFileNameWithoutExtension($arm64_nuget_package))
[System.IO.Compression.ZipFile]::ExtractToDirectory($arm64_nuget_package, $arm64_nupkg_unzipped_directory)
$nupkgs = (Get-ChildItem ..\nuget-artifact-arm -Filter Microsoft.AI.MachineLearning*.snupkg -Recurse)
$arm_nuget_package = $nupkgs[0].FullName
$arm_nupkg_unzipped_directory_root = $nupkgs[0].Directory.FullName
$arm_nupkg_unzipped_directory = [System.IO.Path]::Combine($arm_nupkg_unzipped_directory_root, 'symbols', [System.IO.Path]::GetFileNameWithoutExtension($arm_nuget_package))
[System.IO.Compression.ZipFile]::ExtractToDirectory($arm_nuget_package, $arm_nupkg_unzipped_directory)
$x86_runtime_path_old = [System.IO.Path]::Combine($x86_nupkg_unzipped_directory, 'runtimes', 'win-x86', '_native')
$x86_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-x86', '_native')
$arm64_runtime_path_old = [System.IO.Path]::Combine($arm64_nupkg_unzipped_directory, 'runtimes', 'win-arm64', '_native')
$arm64_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm64', '_native')
$arm_runtime_path_old = [System.IO.Path]::Combine($arm_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native')
$arm_runtime_path_new = [System.IO.Path]::Combine($x64_nupkg_unzipped_directory, 'runtimes', 'win-arm', '_native')
New-Item -Path $x86_runtime_path_new -ItemType Directory
New-Item -Path $arm64_runtime_path_new -ItemType Directory
New-Item -Path $arm_runtime_path_new -ItemType Directory
Copy-Item ([System.IO.Path]::Combine($x86_runtime_path_old, 'onnxruntime.pdb')) $x86_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($x86_runtime_path_old, 'microsoft.ai.machinelearning.pdb')) $x86_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm64_runtime_path_old, 'onnxruntime.pdb')) $arm64_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm64_runtime_path_old, 'microsoft.ai.machinelearning.pdb')) $arm64_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'onnxruntime.pdb')) $arm_runtime_path_new
Copy-Item ([System.IO.Path]::Combine($arm_runtime_path_old, 'microsoft.ai.machinelearning.pdb')) $arm_runtime_path_new
$merged_nuget_path = [System.IO.Path]::Combine($Env:BUILD_ARTIFACTSTAGINGDIRECTORY, 'merged')
if (!(Test-Path $merged_nuget_path)) {
New-Item -Path $merged_nuget_path -ItemType Directory
Expand Down
27 changes: 19 additions & 8 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -354,13 +354,7 @@ if (onnxruntime_USE_ROCM)
endif()
endif()

if (APPLE)
if (NOT CMAKE_OSX_ARCHITECTURES)
message("Building ONNX Runtime for ${CMAKE_HOST_SYSTEM_PROCESSOR}")
endif()
elseif (NOT WIN32 AND NOT APPLE)
message("Building ONNX Runtime for ${CMAKE_SYSTEM_PROCESSOR}")
endif()


# Single output director for all binaries
set(RUNTIME_OUTPUT_DIRECTORY ${CMAKE_BINARY_DIR}/bin CACHE PATH "Single output directory for all binaries.")
Expand Down Expand Up @@ -493,6 +487,14 @@ endif()

include(adjust_global_compile_flags.cmake)

if (APPLE)
if (NOT CMAKE_OSX_ARCHITECTURES)
message("Building ONNX Runtime for ${CMAKE_HOST_SYSTEM_PROCESSOR} CPU ARCH")
endif()
elseif (NOT WIN32 AND NOT APPLE)
message("Building ONNX Runtime for ${onnxruntime_target_platform} CPU ARCH")
endif()

# We need to link with libatomic on systems that do not have built-in atomics, or
# don't have built-in support for 8 byte atomics
# Derived from https://github.com/protocolbuffers/protobuf/blob/master/cmake/CMakeLists.txt
Expand Down Expand Up @@ -639,7 +641,16 @@ else()
check_cxx_compiler_flag(-Wunused-variable HAS_UNUSED_VARIABLE)
check_cxx_compiler_flag(-Wuseless-cast HAS_USELESS_CAST)
check_function_exists(reallocarray HAS_REALLOCARRAY)

if (NOT APPLE AND NOT CMAKE_SYSTEM_NAME STREQUAL "Emscripten" AND onnxruntime_target_platform STREQUAL "aarch64")
check_cxx_compiler_flag(-march=armv8.2-a+bf16 HAS_ARM64_BFLOAT16)
if(NOT HAS_ARM64_BFLOAT16)
message(FATAL_ERROR "The compiler doesn't support BFLOAT16!!!")
endif()
check_cxx_compiler_flag(-march=armv8.2-a+fp16 HAS_ARM64_FLOAT16)
if(NOT HAS_ARM64_FLOAT16)
message(FATAL_ERROR "The compiler doesn't support FLOAT16!!!")
endif()
endif()
if (HAS_TAUTOLOGICAL_POINTER_COMPARE)
#we may have extra null pointer checkings in debug build, it's not an issue
list(APPEND ORT_WARNING_FLAGS -Wno-tautological-pointer-compare)
Expand Down
25 changes: 25 additions & 0 deletions cmake/adjust_global_compile_flags.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -300,6 +300,31 @@ if (MSVC)
endif()
else()
if (NOT APPLE)
#XXX: Sometimes the value of CMAKE_SYSTEM_PROCESSOR is set but it's wrong. For example, if you run an armv7 docker
#image on an aarch64 machine with an aarch64 Ubuntu host OS, in the docker instance cmake may still report
# CMAKE_SYSTEM_PROCESSOR as aarch64 by default. Given compiling this code may need more than 2GB memory, we do not
# support compiling for ARM32 natively(only support cross-compiling), we will ignore this issue for now.
if(NOT CMAKE_SYSTEM_PROCESSOR)
message(WARNING "CMAKE_SYSTEM_PROCESSOR is not set. Please set it in your toolchain cmake file.")
# Try to detect it
if("${CMAKE_C_COMPILER_ID}" STREQUAL "GNU" OR "${CMAKE_C_COMPILER_ID}" STREQUAL "Clang")
execute_process(
COMMAND "${CMAKE_C_COMPILER}" -dumpmachine
OUTPUT_VARIABLE GCC_DUMP_MACHINE_OUT OUTPUT_STRIP_TRAILING_WHITESPACE
ERROR_VARIABLE _err
RESULT_VARIABLE _res
)
if(NOT _res EQUAL 0)
message(SEND_ERROR "Failed to run 'gcc -dumpmachine':\n ${_res}")
endif()
string(REPLACE "-" ";" GCC_DUMP_MACHINE_OUT_LIST "${GCC_DUMP_MACHINE_OUT}")
list(LENGTH GCC_DUMP_MACHINE_OUT_LIST GCC_TRIPLET_LEN)
if(GCC_TRIPLET_LEN EQUAL 4)
list(GET GCC_DUMP_MACHINE_OUT_LIST 0 CMAKE_SYSTEM_PROCESSOR)
message("Setting CMAKE_SYSTEM_PROCESSOR to ${CMAKE_SYSTEM_PROCESSOR}")
endif()
endif()
endif()
set(onnxruntime_target_platform ${CMAKE_SYSTEM_PROCESSOR})
endif()
if (onnxruntime_BUILD_FOR_NATIVE_MACHINE)
Expand Down
4 changes: 4 additions & 0 deletions js/web/lib/build-def.d.ts
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,10 @@ interface BuildDefinitions {
* defines whether to disable the whole WebGpu backend in the build.
*/
readonly DISABLE_WEBGPU: boolean;
/**
* defines whether to disable the whole WebNN backend in the build.
*/
readonly DISABLE_WEBNN: boolean;
/**
* defines whether to disable the whole WebAssembly backend in the build.
*/
Expand Down
4 changes: 3 additions & 1 deletion js/web/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,9 @@ if (!BUILD_DEFS.DISABLE_WASM) {
registerBackend('wasm', wasmBackend, 10);
if (BUILD_DEFS.DISABLE_TRAINING) {
registerBackend('xnnpack', wasmBackend, 9);
registerBackend('webnn', wasmBackend, 9);
if (!BUILD_DEFS.DISABLE_WEBNN) {
registerBackend('webnn', wasmBackend, 9);
}
}
}

Expand Down
12 changes: 6 additions & 6 deletions js/web/lib/wasm/jsep/webgpu/op-resolve-rules.ts
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
// Licensed under the MIT License.

import {argMax, argMin, parseArgMinMaxAttributes} from './ops/argminmax';
import {attention, parseAttentionAttributes} from './ops/attention';
import {attention} from './ops/attention';
import {batchNorm} from './ops/batch-norm';
import {biasAdd} from './ops/bias-add';
import {biasSplitGelu} from './ops/bias-split-gelu';
Expand All @@ -17,10 +17,10 @@ import {gather, parseGatherAttributes} from './ops/gather';
import {gatherElements, parseGatherElementsAttributes} from './ops/gather-elements';
import {gemm, parseGemmAttributes} from './ops/gemm';
import {instanceNorm, parseInstanceNormAttributes} from './ops/instance-norm';
import {layerNorm, parseLayerNormAttributes} from './ops/layer-norm';
import {layerNorm} from './ops/layer-norm';
import {matMul} from './ops/matmul';
import {multiHeadAttention, parseMultiHeadAttentionAttributes} from './ops/multi-head-attentiion';
import {pad, parsePadAttributes} from './ops/pad';
import {pad} from './ops/pad';
import * as pool from './ops/pool';
import {range} from './ops/range';
import {reduceL1, reduceL2, reduceLogSum, reduceLogSumExp, reduceMax, reduceMean, reduceMin, reduceProd, reduceSum, reduceSumSquare} from './ops/reduce';
Expand Down Expand Up @@ -50,7 +50,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
['Asinh', [unaryOps.asinh]],
['Atan', [unaryOps.atan]],
['Atanh', [unaryOps.atanh]],
['Attention', [attention, parseAttentionAttributes]],
['Attention', [attention]],
// TODO: support new attributes for AveragePool-10
['AveragePool', [pool.averagePool, pool.parseAveragePoolAttributes]],
['BatchNormalization', [batchNorm]],
Expand Down Expand Up @@ -83,7 +83,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
['Greater', [binaryOps.greater]],
['GreaterOrEqual', [binaryOps.greaterOrEqual]],
['InstanceNormalization', [instanceNorm, parseInstanceNormAttributes]],
['LayerNormalization', [layerNorm, parseLayerNormAttributes]],
['LayerNormalization', [layerNorm]],
['LeakyRelu', [unaryOps.leakyRelu, unaryOps.parseAlphaAttributes]],
['Less', [binaryOps.less]],
['LessOrEqual', [binaryOps.lessOrEqual]],
Expand All @@ -95,7 +95,7 @@ export const WEBGPU_OP_RESOLVE_RULES: Map<string, OperatorImplementation> = new
['MultiHeadAttention', [multiHeadAttention, parseMultiHeadAttentionAttributes]],
['Neg', [unaryOps.neg]],
['Not', [unaryOps.not]],
['Pad', [pad, parsePadAttributes]],
['Pad', [pad]],
['Pow', [binaryOps.pow]],
['Range', [range]],
['Reciprocal', [unaryOps.reciprocal]],
Expand Down
Loading

0 comments on commit d760cdf

Please sign in to comment.