Skip to content

Commit

Permalink
Merge remote-tracking branch 'origin/main' into create_mlbuffer
Browse files Browse the repository at this point in the history
  • Loading branch information
egalli committed Aug 16, 2024
2 parents 03c59fd + b9f3a5d commit 5744a9c
Show file tree
Hide file tree
Showing 385 changed files with 33,749 additions and 27,485 deletions.
2 changes: 1 addition & 1 deletion .github/workflows/publish-python-apidocs.yml
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ permissions:
jobs:
build:
name: Generate Python API docs
runs-on: ubuntu-latest
runs-on: ["self-hosted", "1ES.Pool=onnxruntime-github-ubuntu-CPU"]
steps:
- uses: actions/checkout@v4
- name: Install tools
Expand Down
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
Expand Up @@ -7,4 +7,4 @@
[submodule "cmake/external/emsdk"]
path = cmake/external/emsdk
url = https://github.com/emscripten-core/emsdk.git
branch = 3.1.59
branch = 3.1.62
1 change: 0 additions & 1 deletion .lintrunner.toml
Original file line number Diff line number Diff line change
Expand Up @@ -127,7 +127,6 @@ include_patterns = [
]
exclude_patterns = [
'java/**', # FIXME: Enable clang-format for java
'js/**',
'onnxruntime/contrib_ops/cuda/bert/tensorrt_fused_multihead_attention/**', # Contains data chunks
'onnxruntime/core/flatbuffers/schema/*.fbs.h', # Generated code
'onnxruntime/test/flatbuffers/*.fbs.h', # Generated code
Expand Down
2 changes: 1 addition & 1 deletion .pipelines/nuget_config/x64/packages.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="python" version="3.9.7" targetFramework="native" />
<package id="Microsoft.AI.DirectML" version="1.15.0" targetFramework="native" />
<package id="Microsoft.AI.DirectML" version="1.15.1" targetFramework="native" />
<package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
</packages>
2 changes: 1 addition & 1 deletion .pipelines/nuget_config/x86/packages.config
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
<?xml version="1.0" encoding="utf-8"?>
<packages>
<package id="pythonx86" version="3.9.7" targetFramework="native" />
<package id="Microsoft.AI.DirectML" version="1.15.0" targetFramework="native" />
<package id="Microsoft.AI.DirectML" version="1.15.1" targetFramework="native" />
<package id="Microsoft.Windows.CppWinRT" version="2.0.201201.7" targetFramework="native" />
</packages>
2 changes: 1 addition & 1 deletion cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "d52c46520124845b1e0e0525f2759299d840143f",
"commitHash": "0fde04880048f743056bed17cb0543a42e040fae",
"repositoryUrl": "https://github.com/emscripten-core/emsdk.git"
},
"comments": "git submodule at cmake/external/emsdk"
Expand Down
2 changes: 1 addition & 1 deletion cmake/external/dml.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML)
set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config)
set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config)
get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE)
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.0)
set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.15.1)

# Restore nuget packages, which will pull down the DirectML redist package.
add_custom_command(
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
<?xml version="1.0" encoding="utf-8"?>
<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
<ItemGroup Condition="('$(OutputType)'!='Library' OR '$(IsAppExtension)'=='True')">
<NativeReference Include="$(MSBuildThisFileDirectory)..\..\runtimes\ios\native\onnxruntime.xcframework">
<NativeReference Include="$(MSBuildThisFileDirectory)..\..\runtimes\ios\native\onnxruntime.xcframework.zip">
<Kind>Static</Kind>
<IsCxx>True</IsCxx>
<SmartLink>True</SmartLink>
Expand All @@ -10,4 +10,4 @@
<WeakFrameworks>CoreML</WeakFrameworks>
</NativeReference>
</ItemGroup>
</Project>
</Project>
59 changes: 59 additions & 0 deletions docs/ContribOperators.md
Original file line number Diff line number Diff line change
Expand Up @@ -37,6 +37,7 @@ Do not modify directly.*
* <a href="#com.microsoft.FusedMatMul">com.microsoft.FusedMatMul</a>
* <a href="#com.microsoft.FusedMatMulActivation">com.microsoft.FusedMatMulActivation</a>
* <a href="#com.microsoft.GatedRelativePositionBias">com.microsoft.GatedRelativePositionBias</a>
* <a href="#com.microsoft.GatherBlockQuantized">com.microsoft.GatherBlockQuantized</a>
* <a href="#com.microsoft.GatherND">com.microsoft.GatherND</a>
* <a href="#com.microsoft.Gelu">com.microsoft.Gelu</a>
* <a href="#com.microsoft.GemmFastGelu">com.microsoft.GemmFastGelu</a>
Expand Down Expand Up @@ -2030,6 +2031,64 @@ This version of the operator has been available since version 1 of the 'com.micr
</dl>


### <a name="com.microsoft.GatherBlockQuantized"></a><a name="com.microsoft.gatherblockquantized">**com.microsoft.GatherBlockQuantized**</a>

GatherBlockQuantized is a Gather with data quantized. It is similar to Gather (https://github.com/onnx/onnx/blob/main/docs/Operators.md#gather) with differences:
1. Input `data` is a constant. It is quantized block-wise along attribute `quantize_axis` with block size specified by attribute `block_size`.
`block_size must` be a power of 2 and not smaller than 16, like 16, 32, 64, 128, ..
2. Input `data`'s scale and zero point are specified by input `scales` and `zero_points`. `scales` and `zero_points` are also constants.
If `zero_points` is not provided, 0 is the zero point.
3. During the op execution, `data` and `indices` are first used to generate the quantized output. Then, `scales` and `zero_points` are used
to dequantize the output.
4. The `output` and `scales` have the same type. The `data` and `zero_points` have the same type.

#### Version

This version of the operator has been available since version 1 of the 'com.microsoft' operator set.

#### Attributes

<dl>
<dt><tt>block_size</tt> : int</dt>
<dd>(Optional) block size used for weight quantization. It needs to be a power of 2 and not smaller than 16.</dd>
<dt><tt>gather_axis</tt> : int</dt>
<dd>(Optional) Which axis to gather on. Negative value means counting dimensions from the back. Accepted range is [-r, r-1] where r = rank(data).</dd>
<dt><tt>quantize_axis</tt> : int</dt>
<dd>(Optional) Which axis to block-wise quantize. Negative value means counting dimensions from the back. Accepted range is [-r, r-1] where r = rank(data).</dd>
</dl>

#### Inputs (3 - 4)

<dl>
<dt><tt>data</tt> : T1</dt>
<dd>Tensor of rank r >= 1. Block-wise quantized.</dd>
<dt><tt>indices</tt> : Tind</dt>
<dd>Tensor of int32/int64 indices, of any rank q. All index values are expected to be within bounds [-s, s-1] along axis of size s. It is an error if any of the index values are out of bounds.</dd>
<dt><tt>scales</tt> : T2</dt>
<dd>quantization scale</dd>
<dt><tt>zero_points</tt> (optional) : T1</dt>
<dd>quantization zero points</dd>
</dl>

#### Outputs

<dl>
<dt><tt>output</tt> : T2</dt>
<dd>Dequantized output tensor of rank q + (r - 1).</dd>
</dl>

#### Type Constraints

<dl>
<dt><tt>T1</tt> : tensor(int4), tensor(uint4)</dt>
<dd>Constrain quantized types.</dd>
<dt><tt>T2</tt> : tensor(float), tensor(float16), tensor(bfloat16)</dt>
<dd>Constrain dequantized types.</dd>
<dt><tt>Tind</tt> : tensor(int32), tensor(int64)</dt>
<dd>Constrain indices to integer types.</dd>
</dl>


### <a name="com.microsoft.GatherND"></a><a name="com.microsoft.gathernd">**com.microsoft.GatherND**</a>

Given `data` tensor of rank r >= 1, and `indices` tensor of rank q >= 1, gather
Expand Down
1 change: 1 addition & 0 deletions docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -477,6 +477,7 @@ Do not modify directly.*
|FusedConv|*in* X:**T**<br> *in* W:**T**<br> *in* B:**T**<br> *in* Z:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|FusedGemm|*in* A:**T**<br> *in* B:**T**<br> *in* C:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|FusedMatMul|*in* A:**T**<br> *in* B:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|GatherBlockQuantized|*in* data:**T1**<br> *in* indices:**Tind**<br> *in* scales:**T2**<br> *in* zero_points:**T1**<br> *out* output:**T2**|1+|**T1** = tensor(int4), tensor(uint4)<br/> **T2** = tensor(float), tensor(float16)<br/> **Tind** = tensor(int32), tensor(int64)|
|GatherND|*in* data:**T**<br> *in* indices:**Tind**<br> *out* output:**T**|1+|**T** = tensor(bfloat16), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(string), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)<br/> **Tind** = tensor(int32), tensor(int64)|
|Gelu|*in* X:**T**<br> *out* Y:**T**|1+|**T** = tensor(float)|
|GreedySearch|*in* input_ids:**I**<br> *in* max_length:**I**<br> *in* min_length:**I**<br> *in* repetition_penalty:**T**<br> *in* vocab_mask:**I**<br> *in* prefix_vocab_mask:**I**<br> *in* attention_mask:**I**<br> *out* sequences:**I**|1+|**T** = tensor(float)|
Expand Down
1 change: 0 additions & 1 deletion docs/python/requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,5 +21,4 @@ onnx
sphinx_exec_code
sphinx_tabs
furo
-f https://download.pytorch.org/whl/torch/
torch
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ struct OrtTensorRTProviderOptionsV2 {
// can be updated using: UpdateTensorRTProviderOptionsWithValue
int trt_max_partition_iterations{1000}; // maximum iterations for TensorRT parser to get capability
int trt_min_subgraph_size{1}; // minimum size of TensorRT subgraphs
size_t trt_max_workspace_size{1 << 30}; // maximum workspace size for TensorRT.
size_t trt_max_workspace_size{0}; // maximum workspace size for TensorRT. Default is 0 means max device memory size
int trt_fp16_enable{0}; // enable TensorRT FP16 precision. Default 0 = false, nonzero = true
int trt_int8_enable{0}; // enable TensorRT INT8 precision. Default 0 = false, nonzero = true
const char* trt_int8_calibration_table_name{nullptr}; // TensorRT INT8 calibration table name.
Expand Down
16 changes: 0 additions & 16 deletions js/.clang-format

This file was deleted.

Loading

0 comments on commit 5744a9c

Please sign in to comment.