Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
Browse files Browse the repository at this point in the history
…bugapi
  • Loading branch information
xadupre committed Dec 21, 2023
2 parents faf3f4f + 914bc40 commit 8b7d8cd
Show file tree
Hide file tree
Showing 122 changed files with 27,306 additions and 3,375 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/labeler.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,7 @@ jobs:
triage:
runs-on: ubuntu-latest
steps:
- uses: github/issue-labeler@v3.2
- uses: github/issue-labeler@v3.3
with:
repo-token: "${{ secrets.GITHUB_TOKEN }}"
configuration-path: .github/labeler.yml
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-java-apidocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Set up JDK 11
uses: actions/setup-java@v3
uses: actions/setup-java@v4
with:
java-version: '11'
distribution: 'adopt'
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/publish-js-apidocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ jobs:
steps:
- uses: actions/checkout@v4
- name: Setup Node.js
uses: actions/setup-node@v3
uses: actions/setup-node@v4
with:
node-version: 18
- name: Generate JS docs
Expand Down
2 changes: 1 addition & 1 deletion .github/workflows/windows.yml
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@ jobs:
python-version: '3.11.x'
architecture: 'x64'

- uses: actions/setup-node@v3
- uses: actions/setup-node@v4
with:
node-version: 18

Expand Down
12 changes: 12 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ option(onnxruntime_USE_QNN "Build with QNN support" OFF)
option(onnxruntime_USE_SNPE "Build with SNPE support" OFF)
option(onnxruntime_USE_RKNPU "Build with RKNPU support" OFF)
option(onnxruntime_USE_DNNL "Build with DNNL support" OFF)
option(onnxruntime_USE_JBLAS "Build MLAS with JBLAS support" ON)
option(onnxruntime_USE_JSEP "Build with JavaScript implemented kernels support" OFF)
option(onnxruntime_BUILD_UNIT_TESTS "Build ONNXRuntime unit tests" ON)
option(onnxruntime_BUILD_CSHARP "Build C# library" OFF)
Expand Down Expand Up @@ -1166,6 +1167,17 @@ if (onnxruntime_USE_DNNL)
add_compile_definitions(DNNL_OPENMP)
endif()

set(USE_JBLAS FALSE)
if (onnxruntime_USE_JBLAS AND NOT onnxruntime_MINIMAL_BUILD)
if ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "GNU" AND onnxruntime_target_platform STREQUAL "x86_64")
add_compile_definitions(MLAS_JBLAS)
set(USE_JBLAS TRUE)
elseif ("${CMAKE_CXX_COMPILER_ID}" STREQUAL "MSVC" AND onnxruntime_target_platform STREQUAL "x64")
add_compile_definitions(MLAS_JBLAS)
set(USE_JBLAS TRUE)
endif()
endif()

# TVM EP
if (onnxruntime_USE_TVM)
if (NOT TARGET tvm)
Expand Down
16 changes: 14 additions & 2 deletions cmake/onnxruntime_mlas.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -45,6 +45,15 @@ endif()

set(ONNXRUNTIME_MLAS_LIBS onnxruntime_mlas)

function(add_jblas)
add_subdirectory(${MLAS_SRC_DIR}/x86_64/jblas jblas)
target_link_libraries(onnxruntime_mlas PRIVATE jblas::jblas)
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/jblas_gemm.cpp
)
set_target_properties(${target_name} PROPERTIES COMPILE_WARNING_AS_ERROR OFF)
endfunction()

#TODO: set MASM flags properly
function(setup_mlas_source_for_windows)

Expand Down Expand Up @@ -200,7 +209,6 @@ function(setup_mlas_source_for_windows)
${MLAS_SRC_DIR}/q4gemm_avx512.cpp
)
endif()

else()
target_sources(onnxruntime_mlas PRIVATE
${MLAS_SRC_DIR}/qgemm_kernel_sse.cpp
Expand Down Expand Up @@ -566,7 +574,7 @@ else()
)
set_source_files_properties(${MLAS_SRC_DIR}/qgemm_kernel_amx.cpp PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
set_source_files_properties(${MLAS_SRC_DIR}/x86_64/QgemmU8S8KernelAmx.S PROPERTIES COMPILE_FLAGS "-mavx2 -mavx512bw -mavx512dq -mavx512vl -mavx512f")
endif()
endif()

if(ONNXRUNTIME_MLAS_MULTI_ARCH)
onnxruntime_add_static_library(onnxruntime_mlas_x86_64 ${mlas_platform_srcs})
Expand Down Expand Up @@ -604,6 +612,10 @@ else()
target_sources(onnxruntime_mlas PRIVATE ${mlas_platform_srcs})
endif()

if(USE_JBLAS)
add_jblas()
endif()

foreach(mlas_target ${ONNXRUNTIME_MLAS_LIBS})
target_include_directories(${mlas_target} PRIVATE ${ONNXRUNTIME_ROOT}/core/mlas/inc ${MLAS_SRC_DIR})
onnxruntime_add_include_to_target(${mlas_target} ${GSL_TARGET})
Expand Down
2 changes: 2 additions & 0 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2824,6 +2824,8 @@ This version of the operator has been available since version 1 of the 'com.micr
<dd>size of each input feature</dd>
<dt><tt>N</tt> : int (required)</dt>
<dd>size of each output feature</dd>
<dt><tt>accuracy_level</tt> : int</dt>
<dd>The minimum accuracy level of input A, can be: 0(unset), 1(fp32), 2(fp16), 3(bf16), or 4(int8) (default unset). It is used to control how input A is quantized or downcast internally while doing computation, for example: 0 means input A will not be quantized or downcast while doing computation. 4 means input A can be quantized with the same block_size to int8 internally from type T1.</dd>
<dt><tt>bits</tt> : int (required)</dt>
<dd>number of bits used for weight quantization (default 4)</dd>
<dt><tt>block_size</tt> : int (required)</dt>
Expand Down
3 changes: 2 additions & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -80,7 +80,8 @@ Do not modify directly.*
|Crop|*in* input:**T**<br> *out* output:**T**|1+|**T** = tensor(float)|
|CumSum|*in* x:**T**<br> *in* axis:**T2**<br> *out* y:**T**|14+|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
|||[11, 13]|**T** = tensor(double), tensor(float), tensor(int32), tensor(int64)<br/> **T2** = tensor(int32), tensor(int64)|
|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *in* axis:**tensor(int64)**<br> *out* output:**T1**<br><br>or<br><br>*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|17+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
|DFT|*in* input:**T1**<br> *in* dft_length:**T2**<br> *in* axis:**tensor(int64)**<br> *out* output:**T1**<br><br>or<br><br>*in* input:**T1**<br> *in* dft_length:**T2**<br> *out* output:**T1**|20+|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
|||[17, 19]|**T1** = tensor(double), tensor(float)<br/> **T2** = tensor(int32), tensor(int64)|
|DepthToSpace|*in* input:**T**<br> *out* output:**T**|13+|**T** = tensor(double), tensor(float)|
|||[11, 12]|**T** = tensor(double), tensor(float)|
|||[1, 10]|**T** = tensor(double), tensor(float)|
Expand Down
6 changes: 0 additions & 6 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3593,17 +3593,11 @@ struct OrtApi {
*
* QNN supported keys:
* "backend_path": file path to QNN backend library.
* "qnn_context_cache_enable": 1 to enable QNN graph creation from cached QNN context file. If it's enabled: QNN EP will
* load from cached QNN context binary if it exist. It will generate a context binary file if it's not exist
* "qnn_context_cache_path": explicitly provide the QNN context cache file. Default to model_file.onnx.bin if not provided.
* "profiling_level": QNN profiling level, options: "off", "basic", "detailed". Default to off.
* "rpc_control_latency": QNN RPC control latency.
* "vtcm_mb": QNN VTCM size in MB. default to 0(not set).
* "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
* "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
* "qnn_context_embed_mode", 1 means dump the QNN context binary into node attribute EPContext->ep_cache_context in the ONNX skeleton model.
* 0 means dump the QNN context binary into separate bin file and set the path to EPContext->ep_cache_context.
* The path is relative path to the ONNX skeleton model file.
* "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
* dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
* may alter model/EP partitioning. Use only for debugging.
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -235,3 +235,18 @@ static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersFil
// Use this config to control the minimum size of the initializer when externalizing it during serialization
static const char* const kOrtSessionOptionsOptimizedModelExternalInitializersMinSizeInBytes =
"session.optimized_model_external_initializers_min_size_in_bytes";

// Enable EP context feature to dump the partitioned graph which include the EP context into Onnx file.
// The dumped Onnx model with EP context can be used for future inference to avoid the EP graph partitioning/compile overhead.
// "0": disable. (default)
// "1": enable.
static const char* const kOrtSessionOptionEpContextEnable = "ep.context_enable";

// Specify the file path for the Onnx model which has EP context.
// Default to original_file_name_ctx.onnx if not specified
static const char* const kOrtSessionOptionEpContextFilePath = "ep.context_file_path";

// Flag to specify whether to dump the EP context into the Onnx model.
// "0": dump the EP context into separate file, keep the file name in the Onnx model.
// "1": dump the EP context into the Onnx model. (default).
static const char* const kOrtSessionOptionEpContextEmbedMode = "ep.context_embed_mode";
2 changes: 1 addition & 1 deletion js/common/lib/backend-impl.ts
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ export const resolveBackend = async(backendHints: readonly string[]): Promise<Ba
const isInitializing = !!backendInfo.initPromise;
try {
if (!isInitializing) {
backendInfo.initPromise = backendInfo.backend.init();
backendInfo.initPromise = backendInfo.backend.init(backendName);
}
await backendInfo.initPromise;
backendInfo.initialized = true;
Expand Down
2 changes: 1 addition & 1 deletion js/common/lib/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -71,7 +71,7 @@ export interface Backend {
/**
* Initialize the backend asynchronously. Should throw when failed.
*/
init(): Promise<void>;
init(backendName: string): Promise<void>;

createInferenceSessionHandler(uriOrBuffer: string|Uint8Array, options?: InferenceSession.SessionOptions):
Promise<InferenceSessionHandler>;
Expand Down
2 changes: 1 addition & 1 deletion js/node/lib/backend.ts
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ class OnnxruntimeSessionHandler implements InferenceSessionHandler {
}

async dispose(): Promise<void> {
return Promise.resolve();
this.#inferenceSession.dispose();
}

readonly inputNames: string[];
Expand Down
2 changes: 2 additions & 0 deletions js/node/lib/binding.ts
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,8 @@ export declare namespace Binding {
readonly outputNames: string[];

run(feeds: FeedsType, fetches: FetchesType, options: RunOptions): ReturnType;

dispose(): void;
}

export interface InferenceSessionConstructor {
Expand Down
19 changes: 18 additions & 1 deletion js/node/src/inference_session_wrap.cc
Original file line number Diff line number Diff line change
Expand Up @@ -31,6 +31,7 @@ Napi::Object InferenceSessionWrap::Init(Napi::Env env, Napi::Object exports) {
Napi::Function func = DefineClass(
env, "InferenceSession",
{InstanceMethod("loadModel", &InferenceSessionWrap::LoadModel), InstanceMethod("run", &InferenceSessionWrap::Run),
InstanceMethod("dispose", &InferenceSessionWrap::Dispose),
InstanceAccessor("inputNames", &InferenceSessionWrap::GetInputNames, nullptr, napi_default, nullptr),
InstanceAccessor("outputNames", &InferenceSessionWrap::GetOutputNames, nullptr, napi_default, nullptr)});

Expand All @@ -45,14 +46,15 @@ Napi::Object InferenceSessionWrap::Init(Napi::Env env, Napi::Object exports) {
}

InferenceSessionWrap::InferenceSessionWrap(const Napi::CallbackInfo &info)
: Napi::ObjectWrap<InferenceSessionWrap>(info), initialized_(false), session_(nullptr),
: Napi::ObjectWrap<InferenceSessionWrap>(info), initialized_(false), disposed_(false), session_(nullptr),
defaultRunOptions_(nullptr) {}

Napi::Value InferenceSessionWrap::LoadModel(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
Napi::HandleScope scope(env);

ORT_NAPI_THROW_ERROR_IF(this->initialized_, env, "Model already loaded. Cannot load model multiple times.");
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");

size_t argsLength = info.Length();
ORT_NAPI_THROW_TYPEERROR_IF(argsLength == 0, env, "Expect argument: model file path or buffer.");
Expand Down Expand Up @@ -129,6 +131,7 @@ Napi::Value InferenceSessionWrap::LoadModel(const Napi::CallbackInfo &info) {
Napi::Value InferenceSessionWrap::GetInputNames(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
ORT_NAPI_THROW_ERROR_IF(!this->initialized_, env, "Session is not initialized.");
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");

Napi::EscapableHandleScope scope(env);
return scope.Escape(CreateNapiArrayFrom(env, inputNames_));
Expand All @@ -137,6 +140,7 @@ Napi::Value InferenceSessionWrap::GetInputNames(const Napi::CallbackInfo &info)
Napi::Value InferenceSessionWrap::GetOutputNames(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
ORT_NAPI_THROW_ERROR_IF(!this->initialized_, env, "Session is not initialized.");
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");

Napi::EscapableHandleScope scope(env);
return scope.Escape(CreateNapiArrayFrom(env, outputNames_));
Expand All @@ -145,6 +149,7 @@ Napi::Value InferenceSessionWrap::GetOutputNames(const Napi::CallbackInfo &info)
Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
ORT_NAPI_THROW_ERROR_IF(!this->initialized_, env, "Session is not initialized.");
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");
ORT_NAPI_THROW_TYPEERROR_IF(info.Length() < 2, env, "Expect argument: inputs(feed) and outputs(fetch).");
ORT_NAPI_THROW_TYPEERROR_IF(!info[0].IsObject() || !info[1].IsObject(), env,
"Expect inputs(feed) and outputs(fetch) to be objects.");
Expand Down Expand Up @@ -209,6 +214,18 @@ Napi::Value InferenceSessionWrap::Run(const Napi::CallbackInfo &info) {
}
}

Napi::Value InferenceSessionWrap::Dispose(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
ORT_NAPI_THROW_ERROR_IF(!this->initialized_, env, "Session is not initialized.");
ORT_NAPI_THROW_ERROR_IF(this->disposed_, env, "Session already disposed.");

this->defaultRunOptions_.reset(nullptr);
this->session_.reset(nullptr);

this->disposed_ = true;
return env.Undefined();
}

Napi::Value InferenceSessionWrap::ListSupportedBackends(const Napi::CallbackInfo &info) {
Napi::Env env = info.Env();
Napi::EscapableHandleScope scope(env);
Expand Down
9 changes: 9 additions & 0 deletions js/node/src/inference_session_wrap.h
Original file line number Diff line number Diff line change
Expand Up @@ -55,13 +55,22 @@ class InferenceSessionWrap : public Napi::ObjectWrap<InferenceSessionWrap> {
*/
Napi::Value Run(const Napi::CallbackInfo &info);

/**
* [sync] dispose the session.
* @param nothing
* @returns nothing
* @throw nothing
*/
Napi::Value Dispose(const Napi::CallbackInfo &info);

// private members

// persistent constructor
static Napi::FunctionReference constructor;

// session objects
bool initialized_;
bool disposed_;
std::unique_ptr<Ort::Session> session_;
std::unique_ptr<Ort::RunOptions> defaultRunOptions_;

Expand Down
17 changes: 14 additions & 3 deletions js/web/lib/backend-wasm.ts
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,7 @@
import {cpus} from 'node:os';
import {Backend, env, InferenceSession, InferenceSessionHandler} from 'onnxruntime-common';

import {initializeWebAssemblyInstance} from './wasm/proxy-wrapper';
import {initializeOrtEp, initializeWebAssemblyAndOrtRuntime} from './wasm/proxy-wrapper';
import {OnnxruntimeWebAssemblySessionHandler} from './wasm/session-handler-inference';

/**
Expand Down Expand Up @@ -33,12 +33,23 @@ export const initializeFlags = (): void => {
};

export class OnnxruntimeWebAssemblyBackend implements Backend {
async init(): Promise<void> {
/**
* This function initializes the WebAssembly backend.
*
* This function will be called only once for each backend name. It will be called the first time when
* `ort.InferenceSession.create()` is called with a registered backend name.
*
* @param backendName - the registered backend name.
*/
async init(backendName: string): Promise<void> {
// populate wasm flags
initializeFlags();

// init wasm
await initializeWebAssemblyInstance();
await initializeWebAssemblyAndOrtRuntime();

// performe EP specific initialization
await initializeOrtEp(backendName);
}
createInferenceSessionHandler(path: string, options?: InferenceSession.SessionOptions):
Promise<InferenceSessionHandler>;
Expand Down
2 changes: 1 addition & 1 deletion js/web/lib/index.ts
Original file line number Diff line number Diff line change
Expand Up @@ -21,7 +21,7 @@ if (!BUILD_DEFS.DISABLE_WEBGL) {
if (!BUILD_DEFS.DISABLE_WASM) {
const wasmBackend = BUILD_DEFS.DISABLE_TRAINING ? require('./backend-wasm-inference').wasmBackend :
require('./backend-wasm-training').wasmBackend;
if (!BUILD_DEFS.DISABLE_WEBGPU && typeof navigator !== 'undefined' && navigator.gpu) {
if (!BUILD_DEFS.DISABLE_WEBGPU) {
registerBackend('webgpu', wasmBackend, 5);
}
registerBackend('cpu', wasmBackend, 10);
Expand Down
12 changes: 1 addition & 11 deletions js/web/lib/wasm/jsep/backend-webgpu.ts
Original file line number Diff line number Diff line change
Expand Up @@ -144,17 +144,7 @@ export class WebGpuBackend {
*/
sessionExternalDataMapping: Map<number, Map<number, [number, GPUBuffer]>> = new Map();

async initialize(env: Env): Promise<void> {
if (!navigator.gpu) {
// WebGPU is not available.
throw new Error('WebGpuBackend: WebGPU is not available.');
}

const adapter = await navigator.gpu.requestAdapter();
if (!adapter) {
throw new Error('WebGpuBackend: Failed to get GPU adapter.');
}

async initialize(env: Env, adapter: GPUAdapter): Promise<void> {
this.env = env;
const requiredFeatures: GPUFeatureName[] = [];
const deviceDescriptor: GPUDeviceDescriptor = {
Expand Down
Loading

0 comments on commit 8b7d8cd

Please sign in to comment.