Skip to content

Commit

Permalink
Merge branch 'main' into softmax_uniform
Browse files Browse the repository at this point in the history
  • Loading branch information
axinging committed Nov 9, 2023
2 parents 165ce99 + 55c19d6 commit b52c9dc
Show file tree
Hide file tree
Showing 101 changed files with 1,721 additions and 459 deletions.
6 changes: 5 additions & 1 deletion .github/workflows/stale.yml
Original file line number Diff line number Diff line change
Expand Up @@ -17,10 +17,14 @@ jobs:
with:
# Comma separated list of labels that can be assigned to issues to exclude them from being marked as stale
exempt-issue-labels: contributions welcome, feature request, regression
# Override exempt-all-assignees but only to exempt the issues with an assignee to be marked as stale automatically
exempt-all-issue-assignees: true
# Used to ignore the issues and pull requests created before the start date
start-date: 20220419
# Number of days without activity before the actions/stale action labels an issue
days-before-issue-stale: 30
# Number of days without activity before the actions/stale action closes an issue
days-before-issue-close: 7
days-before-issue-close: 30
# Label you want to apply to issues that have been inactive for the amount of time specified by days-before-issue-stale
stale-issue-label: "stale"
# Comment that you want to add to issues that are labeled by the actions/stale action
Expand Down
2 changes: 1 addition & 1 deletion cgmanifests/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -568,7 +568,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "d10b27fe37736d2944630ecd7557cefa95cf87c9",
"commitHash": "e7248b26a1ed53fa030c5c459f7ea095dfd276ac",
"repositoryUrl": "https://gitlab.com/libeigen/eigen.git"
}
}
Expand Down
4 changes: 2 additions & 2 deletions cgmanifests/generated/cgmanifest.json
Original file line number Diff line number Diff line change
Expand Up @@ -136,7 +136,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "003c580e696a774afdc984996ee909b7c8d8128c",
"commitHash": "0da379fc4808f9601faef392352018c741c0f297",
"repositoryUrl": "https://github.com/google/XNNPACK.git"
},
"comments": "googlexnnpack"
Expand Down Expand Up @@ -226,7 +226,7 @@
"component": {
"type": "git",
"git": {
"commitHash": "1787867f6183f056420e532eec640cba25efafea",
"commitHash": "4fe0e1e183925bf8cfa6aae24237e724a96479b8",
"repositoryUrl": "https://github.com/Maratyszcza/pthreadpool.git"
},
"comments": "pthreadpool"
Expand Down
9 changes: 7 additions & 2 deletions cmake/deps.txt
Original file line number Diff line number Diff line change
Expand Up @@ -9,14 +9,19 @@
#since the file contains a version string: "lts_20230802". However, the file is for debugging purposes only and would
#not affect built binaries.
#
# NOTE: You must run deps_update_and_upload.py when ready to test your changes in a CI.
# NOTE: You must run deps_update_and_upload.py and generate_cgmanifest.py when ready to test your changes in a CI.
# See https://microsoft.sharepoint.com/teams/ONNX2/_layouts/OneNote.aspx?id=%2Fteams%2FONNX2%2FShared%20Documents%2FNotebooks%2FONNX%20Ecosystem%20Team%20Notebook&wd=target%28Development.one%7C63D3AB47-51D1-4A62-9965-66882234BD44%2FAdd%20or%20update%20a%20dependency%20in%20deps.txt%7C0E9ED71D-89D5-40FA-B05F-C0123289C591%2F%29
#
abseil_cpp;https://github.com/abseil/abseil-cpp/archive/refs/tags/20230802.0.zip;04271dfbfac59269b6939e1e9d5faf0d18a7ba91
cxxopts;https://github.com/jarro2783/cxxopts/archive/3c73d91c0b04e2b59462f0a741be8c07024c1bc0.zip;6c6ca7f8480b26c8d00476e0e24b7184717fe4f0
date;https://github.com/HowardHinnant/date/archive/refs/tags/v3.0.1.zip;2dac0c81dc54ebdd8f8d073a75c053b04b56e159
dlpack;https://github.com/dmlc/dlpack/archive/refs/tags/v0.6.zip;4d565dd2e5b31321e5549591d78aa7f377173445
eigen;https://gitlab.com/libeigen/eigen/-/archive/3.4.0/eigen-3.4.0.zip;ef24286b7ece8737c99fa831b02941843546c081
# This Eigen commit id matches the eigen archive being consumed from https://gitlab.com/libeigen/eigen/-/archive/3.4/eigen-3.4.zip
# prior to the 3.4.1 RC changing the bits and invalidating the hash.
# it contains changes on top of 3.4.0 which are required to fix build issues.
# Until the 3.4.1 release this is the best option we have.
# Issue link: https://gitlab.com/libeigen/eigen/-/issues/2744
eigen;https://gitlab.com/libeigen/eigen/-/archive/e7248b26a1ed53fa030c5c459f7ea095dfd276ac/eigen-e7248b26a1ed53fa030c5c459f7ea095dfd276ac.zip;be8be39fdbc6e60e94fa7870b280707069b5b81a
flatbuffers;https://github.com/google/flatbuffers/archive/refs/tags/v1.12.0.zip;ba0a75fd12dbef8f6557a74e611b7a3d0c5fe7bf
fp16;https://github.com/Maratyszcza/FP16/archive/0a92994d729ff76a58f692d3028ca1b64b145d91.zip;b985f6985a05a1c03ff1bb71190f66d8f98a1494
fxdiv;https://github.com/Maratyszcza/FXdiv/archive/63058eff77e11aa15bf531df5dd34395ec3017c8.zip;a5658f4036402dbca7cebee32be57fb8149811e1
Expand Down
50 changes: 38 additions & 12 deletions cmake/external/abseil-cpp.natvis
Original file line number Diff line number Diff line change
Expand Up @@ -25,30 +25,56 @@
</Type>
<!-- Should handle both flat hash_set and hash_map -->
<Type Name="absl::lts_20230802::container_internal::raw_hash_set&lt;*&gt;">
<DisplayString Condition="size_ == 0">empty</DisplayString>
<DisplayString>{{ size={size_} }}</DisplayString>
<Intrinsic Name="_commonfields" Expression="settings_.value"/>
<Intrinsic Name="_size" Expression="settings_.value.compressed_tuple_.value"/>
<Intrinsic Name="_capacity" Expression="_commonfields().capacity_"/>
<Intrinsic Name="_control" Expression="_commonfields().control_"/>
<Intrinsic Name="_slots" Expression="(slot_type*)(_commonfields().slots_)"/>
<DisplayString Condition="_size() == 0">empty</DisplayString>
<DisplayString IncludeView="noparens">size={ _size() }</DisplayString>
<DisplayString ExcludeView="noparens">size=({_size()})</DisplayString>
<Expand>
<Item Name="[size]" ExcludeView="simple">size_</Item>
<Item Name="[capacity]" ExcludeView="simple">capacity_</Item>
<CustomListItems MaxItemsPerView="5000">
<Item Name="[Size]">_size()</Item>
<Item Name="[Capacity]" ExcludeView="noparens">_capacity()</Item>
<CustomListItems MaxItemsPerView="100">
<Variable Name="nslot" InitialValue="0" />
<Size>size_</Size>
<Size>_size()</Size>
<Loop>
<!-- bool IsFull(ctrl_t c) const { return c >= 0; } -->
<If Condition="ctrl_[nslot] &gt;= 0">
<Item>slots_[nslot]</Item>
<If Condition="_control()[nslot] &gt;= 0">
<Item>_slots()[nslot]</Item>
</If>
<Exec>nslot++</Exec>
<Break Condition="nslot == capacity_" />
<Break Condition="nslot == _capacity()" />
</Loop>
</CustomListItems>
</Expand>
</Type>

<!-- Primitive types stored as a value -->
<Type Name="absl::lts_20230802::container_internal::Storage&lt;*,*,0&gt;">
<DisplayString IncludeView="noparens">*($T1 *){value}</DisplayString>
<DisplayString ExcludeView="noparens">(*($T1 *){value})</DisplayString>
<Expand>
<ExpandedItem>*($T1 *){value}</ExpandedItem>
</Expand>
</Type>

<!-- For storage inherited from the type -->
<Type Name="absl::lts_20230802::container_internal::Storage&lt;*,*,1&gt;">
<DisplayString IncludeView="noparens">*($T1 *)this</DisplayString>
<DisplayString ExcludeView="noparens">(*($T1 *)this)</DisplayString>
<Expand>
<ExpandedItem>*($T1 *)this</ExpandedItem>
</Expand>
</Type>

<Type Name="absl::lts_20230802::container_internal::map_slot_type&lt;*&gt;">
<DisplayString>{{ {value.first}:{value.second} }}</DisplayString>
<DisplayString IncludeView="noparens">{value.first}, {value.second}</DisplayString>
<DisplayString ExcludeView="noparens">({value.first}, {value.second})</DisplayString>
<Expand>
<Item Name="[key]" ExcludeView="simple">value.first</Item>
<Item Name="[value]" ExcludeView="simple">value.second</Item>
<Item Name="first" ExcludeView="simple">value.first</Item>
<Item Name="second" ExcludeView="simple">value.second</Item>
</Expand>
</Type>
</AutoVisualizer>
9 changes: 9 additions & 0 deletions cmake/linux_arm32_crosscompile_toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#This file is just a sample. You may need to modify it before using.
SET(CMAKE_SYSTEM_NAME Linux)
SET(CMAKE_SYSTEM_VERSION 1)
SET(CMAKE_C_COMPILER arm-none-linux-gnueabihf-gcc)
SET(CMAKE_CXX_COMPILER arm-none-linux-gnueabihf-g++)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
9 changes: 9 additions & 0 deletions cmake/linux_arm64_crosscompile_toolchain.cmake
Original file line number Diff line number Diff line change
@@ -0,0 +1,9 @@
#This file is just a sample. You may need to modify it before using.
SET(CMAKE_SYSTEM_NAME Linux)
SET(CMAKE_SYSTEM_VERSION 1)
SET(CMAKE_C_COMPILER aarch64-none-linux-gnu-gcc)
SET(CMAKE_CXX_COMPILER aarch64-none-linux-gnu-g++)
SET(CMAKE_FIND_ROOT_PATH_MODE_PROGRAM NEVER)
SET(CMAKE_FIND_ROOT_PATH_MODE_LIBRARY ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_INCLUDE ONLY)
SET(CMAKE_FIND_ROOT_PATH_MODE_PACKAGE ONLY)
8 changes: 4 additions & 4 deletions docs/Memory_Optimizer.md
Original file line number Diff line number Diff line change
Expand Up @@ -20,10 +20,10 @@ Not all models and recipes need this optimizer technique. Imagine if your traini
## Quick trial

1. Make sure ONNX Runtime training wheel is installed and correctly configured.
2. Integrate models using `ORTModule`, be noted log_level should be equal or lower than INFO.
> ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.INFO))
3. Run the training as usual and redirect all outputs into log file; then stop it after training few steps.
4. Check the logging file, search "Summary", you could possibly find something like this:
2. Integrate models using `ORTModule`, be noted log_level should be equal to or lower than DEVINFO.
> ort_model = ORTModule(pt_model, DebugOptions(log_level=LogLevel.DEVINFO))
3. Run the training as usual and redirect all outputs into the log file; then stop it after training a few steps.
4. Check the logging file, and search "Summary", you could find something like this:
```
MemoryOptimizer Summary:
User config:
Expand Down
84 changes: 84 additions & 0 deletions docs/ORTModule_Training_Guidelines.md
Original file line number Diff line number Diff line change
Expand Up @@ -49,6 +49,90 @@ More options for **developers**.
```
Check [DebugOptions implementation](../orttraining/orttraining/python/training/ortmodule/options.py) for more details.

#### Log Level Explanations

<table>
<tr>
<th style="width:20%">Log Level</th>
<th style="width:80%">Description</th>
</tr>
<tr>
<td>

`FATAL` | `ERROR` | `WARNING` (For Users)

<sup>`WARNING` is the default and recommended level for
<br>users.</sup>
</td>
<td>

- ONNX Runtime backend log level - `FATAL` | `ERROR` | `WARNING`.
- ORTModule log level - `FATAL` | `ERROR` | `WARNING`.
- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
- PyTorch exporter export logs filtering is `ON`.
- PyTorch exporter verbose logs (including tracing graph) filtering is `ON`.

</td>
</tr>
<tr>
<td>

`INFO` (For Users | ORT Developers)

<sup>`INFO` is used for collecting experimental
<br>feature stats, or a little bit more error messages.</sup>
</td>
<td>

- ONNX Runtime backend log level - `WARNING`.
- ORTModule log level - `INFO`.
- Rank-0 log filtering is `ON` (e.g. logging on rank-0-only).
- PyTorch exporter export logs filtering is `ON`.
- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.

</td>
</tr>
<tr>
<td>

`DEVINFO` (For ORT Developers)

<sup>`DEVINFO` is the recommended level for
<br>debugging purposes.</sup>
</td>
<td>

- ONNX Runtime backend log level - `INFO`.
- ORTModule log level - `INFO`.
- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
- PyTorch exporter export logs filtering is `OFF`.
- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.

</td>
</tr>

<tr>
<td>

`VERBOSE` (For ORT Developers)

<sup>`VERBOSE` is the last resort for debugging
<br>hard problems.</sup>
</td>
<td>

- ONNX Runtime backend log level - `VERBOSE`.
- ORTModule log level - `VERBOSE`.
- Rank-0 log filtering is `OFF` (e.g. logging on all ranks).
- PyTorch exporter export logs filtering is `OFF`.
- PyTorch exporter verbose logs (including tracing graph) filtering is `OFF`.

</td>
</tr>

</table>


### 2.1 Environment Variables

`ORTModule` provides environment variables targeting different use cases.
Expand Down
2 changes: 1 addition & 1 deletion docs/OperatorKernels.md
Original file line number Diff line number Diff line change
Expand Up @@ -665,7 +665,7 @@ Do not modify directly.*
|Mul|*in* A:**T**<br> *in* B:**T**<br> *out* C:**T**|14+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|||13|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|||[7, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint32), tensor(uint64)|
|Neg|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
|Neg|*in* X:**T**<br> *out* Y:**T**|13+|**T** = tensor(bfloat16), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
|||[6, 12]|**T** = tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8)|
|NonZero|*in* X:**T**<br> *out* Y:**tensor(int64)**|13+|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
|||[9, 12]|**T** = tensor(bool), tensor(float), tensor(float16), tensor(int32), tensor(int64), tensor(uint8)|
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -278,7 +278,7 @@ class ThreadPoolProfiler {
int num_threads_;
#ifdef _MSC_VER
#pragma warning(push)
// C4324: structure was padded due to alignment specifier
// C4324: structure was padded due to alignment specifier
#pragma warning(disable : 4324)
#endif // _MSC_VER
struct ORT_ALIGN_TO_AVOID_FALSE_SHARING ChildThreadStat {
Expand Down
10 changes: 8 additions & 2 deletions include/onnxruntime/core/session/onnxruntime_c_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -3598,12 +3598,18 @@ struct OrtApi {
* "rpc_control_latency": QNN RPC control latency.
* "htp_performance_mode": QNN performance mode, options: "burst", "balanced", "default", "high_performance",
* "high_power_saver", "low_balanced", "low_power_saver", "power_saver", "sustained_high_performance". Default to "default".
* "qnn_context_embed_mode", 1 means dump the QNN context binary into node attribute EPContext->ep_cache_context in the Onnx skeleton model.
* "qnn_context_embed_mode", 1 means dump the QNN context binary into node attribute EPContext->ep_cache_context in the ONNX skeleton model.
* 0 means dump the QNN context binary into separate bin file and set the path to EPContext->ep_cache_context.
* The path is relative path to the Onnx skeleton model file.
* The path is relative path to the ONNX skeleton model file.
* "qnn_saver_path": File path to the QNN Saver backend library. If specified, QNN Saver will be enabled and will
* dump QNN API calls to disk for replay/debugging. QNN Saver produces incorrect model inference results and
* may alter model/EP partitioning. Use only for debugging.
* "qnn_context_priority": QNN context priority, options: "low", "normal", "normal_high", "high". Default to "normal".
* "htp_graph_finalization_optimization_mode": Set the optimization mode for graph finalization on the HTP backend. Available options:
* - "0": Default.
* - "1": Faster preparation time, less optimal graph.
* - "2": Longer preparation time, more optimal graph.
* - "3": Longest preparation time, most likely even more optimal graph. See QNN SDK documentation for specific details.
*
* SNPE supported keys:
* "runtime": SNPE runtime engine, options: "CPU", "CPU_FLOAT32", "GPU", "GPU_FLOAT32_16_HYBRID", "GPU_FLOAT16",
Expand Down
12 changes: 6 additions & 6 deletions onnxruntime/core/common/threadpool.cc
Original file line number Diff line number Diff line change
Expand Up @@ -562,7 +562,7 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
constexpr ptrdiff_t max_oversharding_factor = 4;
ptrdiff_t block_size = Eigen::numext::mini(
n,
Eigen::numext::maxi<ptrdiff_t>(Eigen::divup<ptrdiff_t>(n, max_oversharding_factor * num_threads), static_cast<ptrdiff_t>(block_size_f)));
Eigen::numext::maxi<ptrdiff_t>(Eigen::numext::div_ceil<ptrdiff_t>(n, max_oversharding_factor * num_threads), static_cast<ptrdiff_t>(block_size_f)));
const ptrdiff_t max_block_size = Eigen::numext::mini(n, 2 * block_size);

if (block_align) {
Expand All @@ -571,19 +571,19 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
block_size = Eigen::numext::mini(n, new_block_size);
}

ptrdiff_t block_count = Eigen::divup(n, block_size);
ptrdiff_t block_count = Eigen::numext::div_ceil(n, block_size);

// Calculate parallel efficiency as fraction of total CPU time used for
// computations:
double max_efficiency =
static_cast<double>(block_count) / (Eigen::divup<ptrdiff_t>(block_count, num_threads) * num_threads);
static_cast<double>(block_count) / (Eigen::numext::div_ceil<ptrdiff_t>(block_count, num_threads) * num_threads);

// Now try to increase block size up to max_block_size as long as it
// doesn't decrease parallel efficiency.
for (ptrdiff_t prev_block_count = block_count; max_efficiency < 1.0 && prev_block_count > 1;) {
// This is the next block size that divides size into a smaller number
// of blocks than the current block_size.
ptrdiff_t coarser_block_size = Eigen::divup(n, prev_block_count - 1);
ptrdiff_t coarser_block_size = Eigen::numext::div_ceil(n, prev_block_count - 1);
if (block_align) {
ptrdiff_t new_block_size = block_align(coarser_block_size);
assert(new_block_size >= coarser_block_size);
Expand All @@ -593,11 +593,11 @@ static ptrdiff_t CalculateParallelForBlock(const ptrdiff_t n, const Eigen::Tenso
break; // Reached max block size. Stop.
}
// Recalculate parallel efficiency.
const ptrdiff_t coarser_block_count = Eigen::divup(n, coarser_block_size);
const ptrdiff_t coarser_block_count = Eigen::numext::div_ceil(n, coarser_block_size);
assert(coarser_block_count < prev_block_count);
prev_block_count = coarser_block_count;
const double coarser_efficiency =
static_cast<double>(coarser_block_count) / (Eigen::divup<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
static_cast<double>(coarser_block_count) / (Eigen::numext::div_ceil<ptrdiff_t>(coarser_block_count, num_threads) * num_threads);
if (coarser_efficiency + 0.01 >= max_efficiency) {
// Taking it.
block_size = coarser_block_size;
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/platform/windows/env.cc
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ limitations under the License.
#include "core/common/span_utils.h"
#include "core/platform/env.h"
#include "core/platform/scoped_resource.h"
#include "unsupported/Eigen/CXX11/src/ThreadPool/ThreadPoolInterface.h"
#include <unsupported/Eigen/CXX11/ThreadPool>
#include <wil/Resource.h>

#include "core/platform/path_lib.h" // for LoopDir()
Expand Down
2 changes: 2 additions & 0 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -971,6 +971,7 @@ class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain,
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Neg);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Neg);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, BFloat16, Neg);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Floor);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Floor);
class ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor);
Expand Down Expand Up @@ -1855,6 +1856,7 @@ static Status RegisterCudaKernels(KernelRegistry& kernel_registry) {
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Neg)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Neg)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Neg)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, BFloat16, Neg)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, float, Floor)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, double, Floor)>,
BuildKernelCreateInfo<ONNX_OPERATOR_TYPED_KERNEL_CLASS_NAME(kCudaExecutionProvider, kOnnxDomain, 13, MLFloat16, Floor)>,
Expand Down
Loading

0 comments on commit b52c9dc

Please sign in to comment.