Skip to content

Commit

Permalink
Merge. Update deps version.
Browse files Browse the repository at this point in the history
  • Loading branch information
skottmckay committed Nov 2, 2023
2 parents 16522d1 + 8d48d3e commit b49dba1
Show file tree
Hide file tree
Showing 153 changed files with 5,982 additions and 1,615 deletions.
12 changes: 1 addition & 11 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -26,7 +26,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "0c296085f9f65f0f8ef7aec7b9eed55faf37dc40",
"commitHash": "b86cc54efce19530fb953e4b21f57e6b3888534c",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "git submodule at cmake/external/onnx"
Expand Down Expand Up @@ -192,16 +192,6 @@
"comments": "mp11"
}
},
{
"component": {
"type": "git",
"git": {
"commitHash": "6a20ba82b439ea1fd650da4d389e96b60a1dd828",
"repositoryUrl": "https://github.com/onnx/onnx.git"
},
"comments": "onnx"
}
},
{
"component": {
"type": "git",
Expand Down
18 changes: 0 additions & 18 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1282,14 +1282,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_CONFIG_CPU_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_U8)
add_definitions(-DOPENVINO_CONFIG_VPUX_U8=1)
endif()

if (onnxruntime_USE_OPENVINO_GPU_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_GPU_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
Expand All @@ -1310,16 +1302,6 @@ if (onnxruntime_USE_OPENVINO)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP32_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP32=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_VPUX_FP16_NP)
add_definitions(-DOPENVINO_CONFIG_VPUX_FP16=1)
add_definitions(-DOPENVINO_DISABLE_GRAPH_PARTITION=1)
endif()

if (onnxruntime_USE_OPENVINO_HETERO)
add_definitions(-DOPENVINO_CONFIG_HETERO=1)
add_definitions(-DDEVICE_NAME="${onnxruntime_USE_OPENVINO_DEVICE}")
Expand Down
2 changes: 1 addition & 1 deletion cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -29,7 +29,7 @@ microsoft_gsl;https://github.com/microsoft/GSL/archive/refs/tags/v4.0.0.zip;cf36
microsoft_wil;https://github.com/microsoft/wil/archive/refs/tags/v1.0.230629.1.zip;e4a542a323c070376f7c2d1973d0f7ddbc1d2fa5
mimalloc;https://github.com/microsoft/mimalloc/archive/refs/tags/v2.1.1.zip;d5ee7d34223d0567892db5179849939c8769dc41
mp11;https://github.com/boostorg/mp11/archive/refs/tags/boost-1.82.0.zip;9bc9e01dffb64d9e0773b2e44d2f22c51aace063
onnx;https://github.com/onnx/onnx/archive/6a20ba82b439ea1fd650da4d389e96b60a1dd828.zip;179a22ad4cd67109c60031ae4b6cf2f434d8bd7e
onnx;https://github.com/onnx/onnx/archive/refs/tags/v1.15.0.zip;54c3f960a0541c5d8d3e60c2933e11f5d3688a11
#use the commit of supporting all the plugins and TRT 8.6-GA (https://github.com/onnx/onnx-tensorrt/commit/0462dc31ae78f48744b6141ae376df1f96d3f459)
onnx_tensorrt;https://github.com/onnx/onnx-tensorrt/archive/a43ce67187bab219520fd80f21af8bbd4354bc8c.zip;572535aefef477050f86744dfab1fef840198035
protobuf;https://github.com/protocolbuffers/protobuf/archive/refs/tags/v21.12.zip;7cf2733949036c7d52fda017badcab093fe73bfa
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/onnx
1 change: 1 addition & 0 deletions cmake/onnxruntime_providers_cuda.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -40,6 +40,7 @@
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_slice.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reshape.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_expand.cc"
"${ONNXRUNTIME_ROOT}/contrib_ops/cuda/collective/distributed_reduce.cc"
)
endif()
# add using ONNXRUNTIME_ROOT so they show up under the 'contrib_ops' folder in Visual Studio
Expand Down
6 changes: 6 additions & 0 deletions cmake/onnxruntime_rocm_hipify.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -94,6 +94,11 @@ set(contrib_ops_excluded_files
"cuda_contrib_kernels.h"
"inverse.cc"
"fused_conv.cc"
"bert/group_query_attention_helper.h"
"bert/group_query_attention.h"
"bert/group_query_attention.cc"
"bert/group_query_attention_impl.h"
"bert/group_query_attention_impl.cu"
)

if (NOT onnxruntime_ENABLE_ATEN)
Expand All @@ -109,6 +114,7 @@ if (NOT onnxruntime_USE_NCCL)
list(APPEND contrib_ops_excluded_files "collective/distributed_slice.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_reshape.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_expand.cc")
list(APPEND contrib_ops_excluded_files "collective/distributed_reduce.cc")
endif()

set(provider_excluded_files
Expand Down
36 changes: 31 additions & 5 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -2422,14 +2422,14 @@ This version of the operator has been available since version 1 of the 'com.micr
<dd>When buffered past_key and past_value is used (present_key uses same tensor as past_key), requiredto specify past_sequence_length (could be 0). Otherwise, past_sequence_length inferred from past_key.</dd>
</dl>

#### Outputs (1 - 3)
#### Outputs

<dl>
<dt><tt>output</tt> : T</dt>
<dd>3D output tensor with shape (batch_size, sequence_length, hidden_size)</dd>
<dt><tt>present_key</tt> (optional) : T</dt>
<dt><tt>present_key</tt> : T</dt>
<dd>present state key with support for format BSNH or BNSH. When past_key uses same tensor as present_key(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
<dt><tt>present_value</tt> (optional) : T</dt>
<dt><tt>present_value</tt> : T</dt>
<dd>present state value with support for format BSNH or BNSH. When past_value uses same tensor as present_value(k-v buffer), it is of length max_sequence_length... otherwise of length past_sequence_length +kv_sequence_length.</dd>
</dl>

Expand Down Expand Up @@ -2580,8 +2580,30 @@ This version of the operator has been available since version 1 of the 'com.micr
And block_size is not an arbitrary number and must be a power of 2 and not smaller than 16, like 16, 32, 64, 128,..
3. Input B's quantization constants or scales are specified by input 'absmax'.

Input B is stored as uint8_t with shape: [(N * K + 1) / 2].
Input absmax is stored in same type as original type of B(float32, float16) with shape like: [(N * K + block_size - 1) / block_size].
Input B is stored as uint8_t with shape: [(N * K + 1) / 2].
Input absmax is stored in same type as original type of B(float32, float16) with shape like: [(N * K + block_size - 1) / block_size].


1. (Default value) transB=True (Majorly used for forward pass)
Shape of A: [D0, D1, ..., Dn, K]
Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].

The computation math:
dequant_B = dequant(B, absmax, quant_type, block_size)
transposed_dequant_B = dequant_B^T
output = A @ transposed_dequant_B

Shape of output: [D0, D1, ..., Dn, N]

2. transB=False (Majorly used for backward pass)
Shape of A: [D0, D1, ..., Dn, N]
Shape of Dequanted B: [N, K], this is aligned with how PyTorch defined the linear weight, .e.g [out_features, in_features].

The computation math:
dequant_B = dequant(B, absmax, quant_type, block_size)
output = A @ dequant_B

Shape of output: [D0, D1, ..., Dn, K]


#### Version
Expand All @@ -2599,6 +2621,10 @@ This version of the operator has been available since version 1 of the 'com.micr
<dd>number of groupsize used for weight quantization. It needs to be a power of 2 and not smaller than 16.</dd>
<dt><tt>quant_type</tt> : int (required)</dt>
<dd>quantization data type. 0 for FP4, 1 for NF4.</dd>
<dt><tt>training_mode</tt> : int</dt>
<dd>Indicate if the ops run in training_mode, by default, False.</dd>
<dt><tt>transB</tt> : int</dt>
<dd>Whether B should be transposed on the last two dimensions before doing multiplication. Default to be 1.</dd>
</dl>

#### Inputs
Expand Down
2 changes: 1 addition & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -801,7 +801,7 @@ Do not modify directly.*
|||[1, 10]|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)|
|Upsample|*in* X:**T**<br> *in* scales:**tensor(float)**<br> *out* Y:**T**<br><br>or<br><br>*in* X:**T**<br> *out* Y:**T**|9|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(uint8)|
|||[7, 8]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(uint8)|
|Where|*in* condition:**B**<br> *in* X:**T**<br> *in* Y:**T**<br> *out* output:**T**|16+|**B** = tensor(bool)<br/> **T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
|Where|*in* condition:**B**<br> *in* X:**T**<br> *in* Y:**T**<br> *out* output:**T**|16+|**B** = tensor(bool)<br/> **T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
|||[9, 15]|**B** = tensor(bool)<br/> **T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
|Xor|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T1**|7+|**T** = tensor(bool)<br/> **T1** = tensor(bool)|
| |
Expand Down
2 changes: 0 additions & 2 deletions docs/python/ReadMeOV.rst
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,6 @@ OpenVINO™ Execution Provider for ONNX Runtime accelerates inference across man
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

Installation
------------
Expand All @@ -22,7 +21,6 @@ This package supports:
- Intel® CPUs
- Intel® integrated GPUs
- Intel® discrete GPUs
- Intel® integrated VPUs

``pip3 install onnxruntime-openvino``

Expand Down
8 changes: 6 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -611,7 +611,7 @@ typedef struct OrtMIGraphXProviderOptions {
typedef struct OrtOpenVINOProviderOptions {
#ifdef __cplusplus
OrtOpenVINOProviderOptions() : device_type{},
enable_vpu_fast_compile{},
enable_npu_fast_compile{},
device_id{},
num_of_threads{},
cache_dir{},
Expand All @@ -624,7 +624,7 @@ typedef struct OrtOpenVINOProviderOptions {
* Valid settings are one of: "CPU_FP32", "CPU_FP16", "GPU_FP32", "GPU_FP16"
*/
const char* device_type;
unsigned char enable_vpu_fast_compile; ///< 0 = disabled, nonzero = enabled
unsigned char enable_npu_fast_compile; ///< 0 = disabled, nonzero = enabled
const char* device_id;
size_t num_of_threads; ///< 0 = Use default number of threads
const char* cache_dir; // path is set to empty by default
Expand Down Expand Up @@ -4605,6 +4605,10 @@ struct OrtCustomOp {
OrtStatusPtr(ORT_API_CALL* KernelComputeV2)(_In_ void* op_kernel, _In_ OrtKernelContext* context);

OrtStatusPtr(ORT_API_CALL* InferOutputShapeFn)(_In_ const struct OrtCustomOp* op, _In_ OrtShapeInferContext*);

// Get start range
int(ORT_API_CALL* GetStartVersion)(_In_ const struct OrtCustomOp* op);
int(ORT_API_CALL* GetEndVersion)(_In_ const struct OrtCustomOp* op);
};

/*
Expand Down
13 changes: 13 additions & 0 deletions include/onnxruntime/core/session/onnxruntime_cxx_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -2228,6 +2228,8 @@ struct ShapeInferContext {

using ShapeInferFn = Ort::Status (*)(Ort::ShapeInferContext&);

#define MAX_CUSTOM_OP_END_VER (1UL << 31) - 1

template <typename TOp, typename TKernel, bool WithStatus = false>
struct CustomOpBase : OrtCustomOp {
CustomOpBase() {
Expand Down Expand Up @@ -2280,6 +2282,14 @@ struct CustomOpBase : OrtCustomOp {
}

SetShapeInferFn<TOp>(0);

OrtCustomOp::GetStartVersion = [](const OrtCustomOp* this_) {
return static_cast<const TOp*>(this_)->start_ver_;
};

OrtCustomOp::GetEndVersion = [](const OrtCustomOp* this_) {
return static_cast<const TOp*>(this_)->end_ver_;
};
}

// Default implementation of GetExecutionProviderType that returns nullptr to default to the CPU provider
Expand Down Expand Up @@ -2348,6 +2358,9 @@ struct CustomOpBase : OrtCustomOp {
protected:
// Helper function that returns a map of session config entries specified by CustomOpBase::GetSessionConfigKeys.
void GetSessionConfigs(std::unordered_map<std::string, std::string>& out, ConstSessionOptions options) const;

int start_ver_ = 1;
int end_ver_ = MAX_CUSTOM_OP_END_VER;
};

} // namespace Ort
Expand Down
59 changes: 43 additions & 16 deletions include/onnxruntime/core/session/onnxruntime_lite_custom_op.h
Original file line number Diff line number Diff line change
Expand Up @@ -773,8 +773,11 @@ struct OrtLiteCustomOp : public OrtCustomOp {
PARSE_ARGS(Ort::Float8E5M2FNUZ_t, ONNX_TENSOR_ELEMENT_DATA_TYPE_FLOAT8E5M2FNUZ)

OrtLiteCustomOp(const char* op_name,
const char* execution_provider) : op_name_(op_name),
execution_provider_(execution_provider) {
const char* execution_provider,
int start_ver = 1, int end_ver = MAX_CUSTOM_OP_END_VER) : op_name_(op_name),
execution_provider_(execution_provider),
start_ver_(start_ver),
end_ver_(end_ver) {
OrtCustomOp::version = ORT_API_VERSION;

OrtCustomOp::GetName = [](const OrtCustomOp* op) { return static_cast<const OrtLiteCustomOp*>(op)->op_name_.c_str(); };
Expand Down Expand Up @@ -837,13 +840,26 @@ struct OrtLiteCustomOp : public OrtCustomOp {
OrtCustomOp::KernelCompute = {};

OrtCustomOp::InferOutputShapeFn = {};

OrtCustomOp::GetStartVersion = [](const OrtCustomOp* op) {
auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
return self->start_ver_;
};

OrtCustomOp::GetEndVersion = [](const OrtCustomOp* op) {
auto self = reinterpret_cast<const OrtLiteCustomOp*>(op);
return self->end_ver_;
};
}

const std::string op_name_;
const std::string execution_provider_;

std::vector<ONNXTensorElementDataType> input_types_;
std::vector<ONNXTensorElementDataType> output_types_;

int start_ver_ = 1;
int end_ver_ = MAX_CUSTOM_OP_END_VER;
};

//////////////////////////// OrtLiteCustomFunc ////////////////////////////////
Expand Down Expand Up @@ -873,9 +889,11 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
OrtLiteCustomFunc(const char* op_name,
const char* execution_provider,
ComputeFn compute_fn,
ShapeInferFn shape_infer_fn = {}) : OrtLiteCustomOp(op_name, execution_provider),
compute_fn_(compute_fn),
shape_infer_fn_(shape_infer_fn) {
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_(compute_fn),
shape_infer_fn_(shape_infer_fn) {
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelCompute = [](void* op_kernel, OrtKernelContext* context) {
Expand Down Expand Up @@ -911,9 +929,11 @@ struct OrtLiteCustomFunc : public OrtLiteCustomOp {
OrtLiteCustomFunc(const char* op_name,
const char* execution_provider,
ComputeFnReturnStatus compute_fn_return_status,
ShapeInferFn shape_infer_fn = {}) : OrtLiteCustomOp(op_name, execution_provider),
compute_fn_return_status_(compute_fn_return_status),
shape_infer_fn_(shape_infer_fn) {
ShapeInferFn shape_infer_fn = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver),
compute_fn_return_status_(compute_fn_return_status),
shape_infer_fn_(shape_infer_fn) {
ParseArgs<Args...>(input_types_, output_types_);

OrtCustomOp::KernelComputeV2 = [](void* op_kernel, OrtKernelContext* context) -> OrtStatusPtr {
Expand Down Expand Up @@ -985,8 +1005,9 @@ struct OrtLiteCustomStruct : public OrtLiteCustomOp {
};

OrtLiteCustomStruct(const char* op_name,
const char* execution_provider) : OrtLiteCustomOp(op_name,
execution_provider) {
const char* execution_provider,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) : OrtLiteCustomOp(op_name, execution_provider, start_ver, end_ver) {
SetCompute(&CustomOp::Compute);

OrtCustomOp::CreateKernel = [](const OrtCustomOp* this_, const OrtApi* ort_api, const OrtKernelInfo* info) {
Expand Down Expand Up @@ -1049,25 +1070,31 @@ template <typename... Args>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider,
void (*custom_compute_fn)(Args...),
Status (*shape_infer_fn)(ShapeInferContext&) = {}) {
Status (*shape_infer_fn)(ShapeInferContext&) = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomFunc<Args...>;
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn, shape_infer_fn).release();
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn, shape_infer_fn, start_ver, end_ver).release();
}

template <typename... Args>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider,
Status (*custom_compute_fn_v2)(Args...),
Status (*shape_infer_fn)(ShapeInferContext&) = {}) {
Status (*shape_infer_fn)(ShapeInferContext&) = {},
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomFunc<Args...>;
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn_v2, shape_infer_fn).release();
return std::make_unique<LiteOp>(op_name, execution_provider, custom_compute_fn_v2, shape_infer_fn, start_ver, end_ver).release();
}

template <typename CustomOp>
OrtLiteCustomOp* CreateLiteCustomOp(const char* op_name,
const char* execution_provider) {
const char* execution_provider,
int start_ver = 1,
int end_ver = MAX_CUSTOM_OP_END_VER) {
using LiteOp = OrtLiteCustomStruct<CustomOp>;
return std::make_unique<LiteOp>(op_name, execution_provider).release();
return std::make_unique<LiteOp>(op_name, execution_provider, start_ver, end_ver).release();
}

} // namespace Custom
Expand Down
Loading

0 comments on commit b49dba1

Please sign in to comment.