Skip to content

Commit

Permalink
Merge branch 'main' of github.com:microsoft/onnxruntime into abjindal…
Browse files Browse the repository at this point in the history
…/build_training_packaging_pipeline_cuda12
  • Loading branch information
ajindal1 committed Nov 20, 2023
2 parents 87d9500 + 247ce21 commit c145e0c
Show file tree
Hide file tree
Showing 172 changed files with 16,203 additions and 17,630 deletions.
4 changes: 1 addition & 3 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -114,9 +114,7 @@ option(onnxruntime_ENABLE_LTO "Enable link time optimization" OFF)
option(onnxruntime_CROSS_COMPILING "Cross compiling onnx runtime" OFF)
option(onnxruntime_GCOV_COVERAGE "Compile with options necessary to run code coverage" OFF)
option(onnxruntime_DONT_VECTORIZE "Do not vectorize operations in Eigen" OFF)

#It's preferred to turn it OFF when onnxruntime is dynamically linked to PROTOBUF. But Tensort always required the full version of protobuf.
cmake_dependent_option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF "NOT onnxruntime_USE_TENSORRT" ON)
option(onnxruntime_USE_FULL_PROTOBUF "Link to libprotobuf instead of libprotobuf-lite when this option is ON" OFF)
option(tensorflow_C_PACKAGE_PATH "Path to tensorflow C package installation dir")
option(onnxruntime_ENABLE_LANGUAGE_INTEROP_OPS "Enable operator implemented in language other than cpp" OFF)
option(onnxruntime_DEBUG_NODE_INPUTS_OUTPUTS "Dump debug information about node inputs and outputs when executing the model." OFF)
Expand Down
20 changes: 7 additions & 13 deletions cmake/onnxruntime_python.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -339,9 +339,6 @@ configure_file(${ONNXRUNTIME_ROOT}/python/_pybind_state.py.in
${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py)

if (onnxruntime_ENABLE_TRAINING)
file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/deprecated/*.py"
)
file(GLOB onnxruntime_python_root_srcs CONFIGURE_DEPENDS
"${ORTTRAINING_SOURCE_DIR}/python/training/*.py"
)
Expand Down Expand Up @@ -419,10 +416,6 @@ if (onnxruntime_ENABLE_TRAINING)
"${ORTTRAINING_SOURCE_DIR}/python/training/onnxblock/optim/*"
)
endif()
else()
file(GLOB onnxruntime_python_capi_training_srcs CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/python/training/*.py"
)
endif()

if (onnxruntime_BUILD_UNIT_TESTS)
Expand All @@ -443,6 +436,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
file(GLOB onnxruntime_python_transformers_testdata_whisper CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/whisper/*.onnx"
)
file(GLOB onnxruntime_python_transformers_testdata_conformer CONFIGURE_DEPENDS
"${ONNXRUNTIME_ROOT}/test/python/transformers/test_data/models/conformer/*.onnx"
)
endif()

file(GLOB onnxruntime_python_tools_srcs CONFIGURE_DEPENDS
Expand Down Expand Up @@ -556,6 +552,7 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/eager_test
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer
COMMAND ${CMAKE_COMMAND} -E copy
${ONNXRUNTIME_ROOT}/__init__.py
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/
Expand All @@ -577,9 +574,6 @@ add_custom_command(
COMMAND ${CMAKE_COMMAND} -E copy_if_different
${CMAKE_BINARY_DIR}/onnxruntime/capi/_pybind_state.py
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_capi_training_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
COMMAND ${CMAKE_COMMAND} -E copy
$<TARGET_FILE:onnxruntime_pybind11_state>
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/
Expand Down Expand Up @@ -711,6 +705,9 @@ if (onnxruntime_BUILD_UNIT_TESTS)
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_testdata_whisper}
$<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/whisper/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_transformers_testdata_conformer}
$<TARGET_FILE_DIR:${build_output_target}>/transformers/test_data/models/conformer/
)
endif()

Expand Down Expand Up @@ -750,9 +747,6 @@ if (onnxruntime_ENABLE_TRAINING)
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/data/
COMMAND ${CMAKE_COMMAND} -E make_directory $<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/utils/hooks/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_capi_training_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/capi/training/
COMMAND ${CMAKE_COMMAND} -E copy
${onnxruntime_python_root_srcs}
$<TARGET_FILE_DIR:${build_output_target}>/onnxruntime/training/
Expand Down
2 changes: 1 addition & 1 deletion cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -906,7 +906,7 @@ if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
endif()
if (CMAKE_SYSTEM_NAME STREQUAL "Emscripten")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_DEPENDS ${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js)
set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
set_target_properties(onnxruntime_test_all PROPERTIES LINK_FLAGS "-s STACK_SIZE=5242880 -s ALLOW_MEMORY_GROWTH=1 -s MAXIMUM_MEMORY=4294967296 --pre-js \"${TEST_SRC_DIR}/wasm/onnxruntime_test_all_adapter.js\" -s \"EXPORTED_RUNTIME_METHODS=['FS']\" --preload-file ${CMAKE_CURRENT_BINARY_DIR}/testdata@/testdata -s EXIT_RUNTIME=1 -s DEMANGLE_SUPPORT=1")
if (onnxruntime_ENABLE_WEBASSEMBLY_THREADS)
set_property(TARGET onnxruntime_test_all APPEND_STRING PROPERTY LINK_FLAGS " -s DEFAULT_PTHREAD_STACK_SIZE=131072 -s PROXY_TO_PTHREAD=1")
endif()
Expand Down
12 changes: 7 additions & 5 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2385,7 +2385,7 @@ This version of the operator has been available since version 1 of the 'com.micr

Group Query Self/Cross Attention.

Supports different number of heads for q and kv.
Supports different number of heads for q and kv. Only supports causal or local attention.

#### Version

Expand All @@ -2396,6 +2396,8 @@ This version of the operator has been available since version 1 of the 'com.micr
<dl>
<dt><tt>kv_num_heads</tt> : int (required)</dt>
<dd>Number of attention heads for k and v</dd>
<dt><tt>local_window_size</tt> : int</dt>
<dd>left_window_size for local attention (like Mistral). Default value is -1 meaning unused.</dd>
<dt><tt>num_heads</tt> : int (required)</dt>
<dd>Number of attention heads for q</dd>
<dt><tt>scale</tt> : float</dt>
Expand Down Expand Up @@ -2647,8 +2649,8 @@ This version of the operator has been available since version 1 of the 'com.micr
#### Type Constraints

<dl>
<dt><tt>T1</tt> : tensor(float), tensor(float16)</dt>
<dd>Constrain input and output types to float/half_float tensors.</dd>
<dt><tt>T1</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
<dd>Constrain input and output types to float/half_float/brain_float tensors.</dd>
<dt><tt>T2</tt> : tensor(uint8)</dt>
<dd>Constrain quantized weight types to uint8.</dd>
</dl>
Expand Down Expand Up @@ -5021,7 +5023,7 @@ This version of the operator has been available since version 1 of the 'com.micr

<dl>
<dt><tt>input</tt> : T</dt>
<dd>3D tensor with shape (batch_size, sequence_length, hidden_size)</dd>
<dd>3D tensor with shape (batch_size, sequence_length, hidden_size) or 4D with shape (batch_size, num_heads, sequence_length, head_size)</dd>
<dt><tt>position_ids</tt> : M</dt>
<dd>1D tensor with shape (1) or 2D tensor with shape (batch_size, sequence_length)</dd>
<dt><tt>cos_cache</tt> : T</dt>
Expand All @@ -5034,7 +5036,7 @@ This version of the operator has been available since version 1 of the 'com.micr

<dl>
<dt><tt>output</tt> : T</dt>
<dd>3D tensor with shape (batch_size, sequence_length, hidden_size)</dd>
<dd>tensor with same shape as input.</dd>
</dl>

#### Type Constraints
Expand Down
2 changes: 1 addition & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -840,7 +840,7 @@ Do not modify directly.*
|Inverse|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
|Irfft|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(double), tensor(float), tensor(float16)|
|LongformerAttention|*in* input:**T**<br> *in* weight:**T**<br> *in* bias:**T**<br> *in* mask:**T**<br> *in* global_weight:**T**<br> *in* global_bias:**T**<br> *in* global:**G**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
|MatMulBnb4|*in* A:**T1**<br> *in* B:**T2**<br> *in* absmax:**T1**<br> *out* Y:**T1**|1+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(uint8)|
|MatMulBnb4|*in* A:**T1**<br> *in* B:**T2**<br> *in* absmax:**T1**<br> *out* Y:**T1**|1+|**T1** = tensor(bfloat16), tensor(float), tensor(float16)<br/> **T2** = tensor(uint8)|
|MatMulNBits|*in* A:**T1**<br> *in* B:**T2**<br> *in* scales:**T1**<br> *in* zero_points:**T2**<br> *out* Y:**T1**|1+|**T1** = tensor(float), tensor(float16)<br/> **T2** = tensor(uint8)|
|MoE|*in* input:**T**<br> *in* router_probs:**T**<br> *in* fc1_experts_weights:**T**<br> *in* fc2_experts_weights:**T**<br> *in* fc1_experts_bias:**T**<br> *in* fc2_experts_bias:**T**<br> *out* output:**T**|1+|**T** = tensor(float), tensor(float16)|
|MultiHeadAttention|*in* query:**T**<br> *in* key:**T**<br> *in* value:**T**<br> *in* bias:**T**<br> *in* key_padding_mask:**M**<br> *in* relative_position_bias:**T**<br> *in* past_key:**T**<br> *in* past_value:**T**<br> *out* output:**T**<br> *out* present_key:**T**<br> *out* present_value:**T**|1+|**T** = tensor(float), tensor(float16)|
Expand Down
47 changes: 30 additions & 17 deletions include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -399,6 +399,15 @@ struct TensorArray : public ArgBase {

using Variadic = TensorArray;

/*
Note:
OrtLiteCustomOp inherits from OrtCustomOp to bridge tween a custom func/struct and ort core.
The lifetime of an OrtLiteCustomOp instance is managed by customer code, not ort, so:
1. DO NOT cast OrtLiteCustomOp to OrtCustomOp and release since there is no virtual destructor in the hierachy.
2. OrtLiteCustomFunc and OrtLiteCustomStruct, as two sub-structs, can be released in form of OrtLiteCustomOp since all members are kept in the OrtLiteCustomOp,
hence memory could still be recycled properly.
Further, OrtCustomOp is a c struct bearing no v-table, so offspring structs are by design to be of zero virtual functions to maintain cast safety.
*/
struct OrtLiteCustomOp : public OrtCustomOp {
using ConstOptionalFloatTensor = std::optional<const Custom::Tensor<float>&>;
using OptionalFloatTensor = std::optional<Custom::Tensor<float>>;
Expand Down Expand Up @@ -774,10 +783,13 @@ struct OrtLiteCustomOp : public OrtCustomOp {

OrtLiteCustomOp(const char* op_name,
const char* execution_provider,
int start_ver = 1, int end_ver = MAX_CUSTOM_OP_END_VER) : op_name_(op_name),
execution_provider_(execution_provider),
start_ver_(start_ver),
end_ver_(end_ver) {
ShapeInferFn shape_infer_fn,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : op_name_(op_name),
execution_provider_(execution_provider),
shape_infer_fn_(shape_infer_fn),
start_ver_(start_ver),
end_ver_(end_ver) {
OrtCustomOp::version = ORT_API_VERSION;

OrtCustomOp::GetName = [](const OrtCustomOp* op) { return static_cast<const OrtLiteCustomOp*>(op)->op_name_.c_str(); };
Expand Down Expand Up @@ -858,8 +870,13 @@ struct OrtLiteCustomOp : public OrtCustomOp {
std::vector<ONNXTensorElementDataType> input_types_;
std::vector<ONNXTensorElementDataType> output_types_;

ShapeInferFn shape_infer_fn_ = {};

int start_ver_ = 1;
int end_ver_ = MAX_CUSTOM_OP_END_VER;

void* compute_fn_ = {};
void* compute_fn_return_status_ = {};
};

//////////////////////////// OrtLiteCustomFunc ////////////////////////////////
Expand Down Expand Up @@ -891,9 +908,8 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
ComputeFn compute_fn,
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_(compute_fn),
shape_infer_fn_(shape_infer_fn) {
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, shape_infer_fn, start_ver, end_ver) {
compute_fn_ = reinterpret_cast<void*>(compute_fn);
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
Expand All @@ -905,7 +921,8 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {

OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
auto kernel = std::make_unique<Kernel>();
kernel->compute_fn_ = static_cast<const MyType*>(this_)->compute_fn_;
auto me = static_cast<const MyType*>(this_);
kernel->compute_fn_ = reinterpret_cast<ComputeFn>(me->compute_fn_);
Ort::ThrowOnError(ort_api->KernelInfo_GetInputCount(info, &kernel->num_input_));
Ort::ThrowOnError(ort_api->KernelInfo_GetOutputCount(info, &kernel->num_output_));
auto self = static_cast<const OrtLiteCustomFunc*>(this_);
Expand All @@ -931,9 +948,8 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
ComputeFnReturnStatus compute_fn_return_status,
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_return_status_(compute_fn_return_status),
shape_infer_fn_(shape_infer_fn) {
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, shape_infer_fn, start_ver, end_ver) {
compute_fn_return_status_ = reinterpret_cast<void*>(compute_fn_return_status);
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelComputeV2 = [](void* op_kernel, OrtKernelContext* context) -> OrtStatusPtr {
Expand All @@ -945,7 +961,8 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {

OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
auto kernel = std::make_unique<Kernel>();
kernel->compute_fn_return_status_ = static_cast<const MyType*>(this_)->compute_fn_return_status_;
auto me = static_cast<const MyType*>(this_);
kernel->compute_fn_return_status_ = reinterpret_cast<ComputeFnReturnStatus>(me->compute_fn_return_status_);
Ort::ThrowOnError(ort_api->KernelInfo_GetInputCount(info, &kernel->num_input_));
Ort::ThrowOnError(ort_api->KernelInfo_GetOutputCount(info, &kernel->num_output_));
auto self = static_cast<const OrtLiteCustomFunc*>(this_);
Expand All @@ -965,10 +982,6 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
};
}
}

ComputeFn compute_fn_ = {};
ComputeFnReturnStatus compute_fn_return_status_ = {};
ShapeInferFn shape_infer_fn_ = {};
}; // struct OrtLiteCustomFunc

/////////////////////////// OrtLiteCustomStruct ///////////////////////////
Expand Down Expand Up @@ -1007,7 +1020,7 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
OrtLiteCustomStruct(const char* op_name,
const char* execution_provider,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver) {
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, {}, start_ver, end_ver) {
SetCompute(&CustomOp::Compute);

OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
Expand Down
70 changes: 60 additions & 10 deletions js/.eslintrc.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,10 +5,18 @@

module.exports = {
root: true,
ignorePatterns: ['**/*.js', 'ort-schema/', 'common/test/type-tests/', 'test/data/', 'node_modules/', 'dist/'],
ignorePatterns: [
'**/*.js',
'node_modules/',
'ort-schema/',
'common/test/type-tests/',
'web/types.d.ts',
'test/data/',
'dist/',
],
env: { 'es6': true },
parser: '@typescript-eslint/parser',
parserOptions: { 'project': 'tsconfig.json', 'sourceType': 'module' },
parserOptions: { 'project': true, 'sourceType': 'module' },
plugins: ['@typescript-eslint', 'prefer-arrow', 'header', 'import', 'unicorn', 'jsdoc'],
rules: {
'unicorn/filename-case': 'error',
Expand Down Expand Up @@ -144,15 +152,56 @@ module.exports = {
'no-unused-expressions': 'off',
}
}, {
files: ['web/lib/**/*.ts'],
excludedFiles: 'web/lib/wasm/proxy-worker/**/*',
parserOptions: { 'project': 'web/tsconfig.json' },
rules: {
'no-underscore-dangle': 'off',
files: ['web/lib/**/*.ts'], rules: {
'no-underscore-dangle': ['error', {
'allow': [
'_free',
'_malloc',
'_JsepGetNodeName',
'_JsepOutput',
'_OrtAddFreeDimensionOverride',
'_OrtAddRunConfigEntry',
'_OrtAddSessionConfigEntry',
'_OrtAppendExecutionProvider',
'_OrtBindInput',
'_OrtBindOutput',
'_OrtClearBoundOutputs',
'_OrtCreateBinding',
'_OrtCreateRunOptions',
'_OrtCreateSession',
'_OrtCreateSessionOptions',
'_OrtCreateTensor',
'_OrtEndProfiling',
'_OrtFree',
'_OrtGetInputName',
'_OrtGetInputOutputCount',
'_OrtGetLastError',
'_OrtGetOutputName',
'_OrtGetTensorData',
'_OrtInit',
'_OrtReleaseBinding',
'_OrtReleaseRunOptions',
'_OrtReleaseSession',
'_OrtReleaseSessionOptions',
'_OrtReleaseTensor',
'_OrtRun',
'_OrtRunWithBinding',
'_OrtTrainingCopyParametersFromBuffer',
'_OrtTrainingCopyParametersToBuffer',
'_OrtTrainingCreateSession',
'_OrtTrainingEvalStep',
'_OrtTrainingGetModelInputOutputCount',
'_OrtTrainingGetModelInputOutputName',
'_OrtTrainingGetParametersSize',
'_OrtTrainingLazyResetGrad',
'_OrtTrainingLoadCheckpoint',
'_OrtTrainingOptimizerStep',
'_OrtTrainingReleaseCheckpoint',
'_OrtTrainingReleaseSession',
'_OrtTrainingRunTrainStep'
]
}]
}
}, {
files: ['web/lib/wasm/proxy-worker/**/*.ts'],
parserOptions: { 'project': 'web/lib/wasm/proxy-worker/tsconfig.json' },
}, {
files: ['web/lib/onnxjs/**/*.ts'], rules: {
// TODO: those rules are useful. should turn on them in future (webgl refactor)
Expand All @@ -164,6 +213,7 @@ module.exports = {
'import/no-internal-modules': 'off',
'prefer-arrow/prefer-arrow-functions': 'off',
'no-param-reassign': 'off',
'no-underscore-dangle': 'off',
'guard-for-in': 'off'
}
}, {
Expand Down
Loading

0 comments on commit c145e0c

Please sign in to comment.