From a747a00cd30b96c0e3742e1f03ad22cce65c492a Mon Sep 17 00:00:00 2001 From: Chi Lo <54722500+chilo-ms@users.noreply.github.com> Date: Thu, 18 Apr 2024 19:39:08 -0700 Subject: [PATCH 1/4] [TensorRT EP] Use protobuf with debug build on Windows (#20378) TRT EP implicitly uses oss_parser with debug build on Windows, therefore it should use protobuf rather than protobuf-lite. --- tools/ci_build/build.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/tools/ci_build/build.py b/tools/ci_build/build.py index d2119986dcc51..33dc403777de6 100644 --- a/tools/ci_build/build.py +++ b/tools/ci_build/build.py @@ -1243,11 +1243,16 @@ def generate_build_tree( "-Donnxruntime_USE_OPENVINO_AUTO=" + ("ON" if args.use_openvino.startswith("AUTO") else "OFF"), ] - # VitisAI and OpenVINO providers currently only support - # full_protobuf option. TensorRT provider only requires it if built with oss_parser + # VitisAI and OpenVINO providers currently only support full_protobuf option. + # TensorRT provider only requires it if built with oss_parser, and + # it implicitly uses oss_parser with debug build on Windows. + # + # Note: oss_parser will support protobuf-lite in TRT 10 GA, so TRT EP will fully + # support protobuf-lite then. if ( args.use_full_protobuf or (args.use_tensorrt and args.use_tensorrt_oss_parser) + or (args.use_tensorrt and is_windows() and "Debug" in args.config) or args.use_openvino or args.use_vitisai or args.gen_doc From b8c90beef2b1c9d9297365a9a8f49bb779c84147 Mon Sep 17 00:00:00 2001 From: Patrice Vignola Date: Thu, 18 Apr 2024 22:17:31 -0700 Subject: [PATCH 2/4] [DML EP] Add SimplifiedLayerNorm and SkipSimplifiedLayerNorm (#20326) --- docs/OperatorKernels.md | 4 ++- .../DmlOperatorLayerNormalization.cpp | 25 ++++++++++++++----- .../DmlOperatorSkipLayerNormalization.cpp | 11 +++++--- .../src/Operators/OperatorRegistration.cpp | 8 ++++-- .../dml/OperatorAuthorHelper/OperatorHelper.h | 2 ++ .../OperatorAuthorHelper/OperatorVersions.h | 2 ++ .../test/contrib_ops/layer_norm_test.cc | 10 +++++--- .../test/contrib_ops/skiplayernorm_op_test.cc | 5 +--- 8 files changed, 46 insertions(+), 21 deletions(-) diff --git a/docs/OperatorKernels.md b/docs/OperatorKernels.md index 5bae5ea626576..0ce4874a620bb 100644 --- a/docs/OperatorKernels.md +++ b/docs/OperatorKernels.md @@ -1058,7 +1058,7 @@ Do not modify directly.* |LSTM|*in* X:**T**
*in* W:**T**
*in* R:**T**
*in* B:**T**
*in* sequence_lens:**T1**
*in* initial_h:**T**
*in* initial_c:**T**
*in* P:**T**
*out* Y:**T**
*out* Y_h:**T**
*out* Y_c:**T**|14+|**T** = tensor(float), tensor(float16)| |||7+|**T** = tensor(float), tensor(float16)| |LayerNormalization|*in* X:**T**
*in* Scale:**T**
*in* B:**T**
*out* Y:**T**
*out* Mean:**U**
*out* InvStdDev:**U**

or

*in* X:**T**
*in* Scale:**V**
*in* B:**V**
*out* Y:**V**
*out* Mean:**U**
*out* InvStdDev:**U**|17+|**T** = tensor(float), tensor(float16)
**U** = tensor(float)| -|||1+|**T** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| +|||1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| |LeakyRelu|*in* X:**T**
*out* Y:**T**|16+|**T** = tensor(float), tensor(float16)| |||6+|**T** = tensor(float), tensor(float16)| |Less|*in* A:**T**
*in* B:**T**
*out* C:**T1**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(bool)| @@ -1224,6 +1224,7 @@ Do not modify directly.* |||6+|**T** = tensor(float), tensor(float16)| |Sign|*in* input:**T**
*out* output:**T**|13+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| |||9+|**T** = tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)| +|SimplifiedLayerNormalization|*in* X:**T**
*in* scale:**V**
*out* Y:**V**
*out* inv_std_var:**U**|1+|**T** = tensor(float), tensor(float16)
**U** = tensor(float), tensor(float16)
**V** = tensor(float), tensor(float16)| |Sin|*in* input:**T**
*out* output:**T**|7+|**T** = tensor(float), tensor(float16)| |Sinh|*in* input:**T**
*out* output:**T**|9+|**T** = tensor(float), tensor(float16)| |Size|*in* data:**T**
*out* size:**T1**|19+|**T** = seq(tensor(bool)), seq(tensor(double)), seq(tensor(float)), seq(tensor(float16)), seq(tensor(int16)), seq(tensor(int32)), seq(tensor(int64)), seq(tensor(int8)), seq(tensor(uint16)), seq(tensor(uint32)), seq(tensor(uint64)), seq(tensor(uint8)), tensor(bool), tensor(double), tensor(float), tensor(float16), tensor(int16), tensor(int32), tensor(int64), tensor(int8), tensor(uint16), tensor(uint32), tensor(uint64), tensor(uint8)
**T1** = tensor(int64)| @@ -1306,6 +1307,7 @@ Do not modify directly.* |QuickGelu|*in* X:**T**
*out* Y:**T**|1+|**T** = tensor(float), tensor(float16)| |RotaryEmbedding|*in* input:**T**
*in* position_ids:**M**
*in* cos_cache:**T**
*in* sin_cache:**T**
*out* output:**T**|1+|**M** = tensor(int64)
**T** = tensor(float), tensor(float16)| |SkipLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* beta:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| +|SkipSimplifiedLayerNormalization|*in* input:**T**
*in* skip:**T**
*in* gamma:**T**
*in* bias:**T**
*out* output:**T**
*out* mean:**U**
*out* inv_std_var:**U**
*out* input_skip_bias_sum:**T**|1+|**T** = tensor(float), tensor(float16)| | | | | |**Operator Domain:** *com.microsoft.dml*|||| diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorLayerNormalization.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorLayerNormalization.cpp index 80e6fefc2fb80..9cf6163b61a24 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorLayerNormalization.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorLayerNormalization.cpp @@ -9,7 +9,7 @@ namespace Dml class DmlOperatorLayerNormalization : public DmlOperator { public: - DmlOperatorLayerNormalization(const MLOperatorKernelCreationContext& kernelCreationContext) + DmlOperatorLayerNormalization(const MLOperatorKernelCreationContext& kernelCreationContext, bool simplified) : DmlOperator(kernelCreationContext) { std::vector> kernelInputIndices = {0, 1, 2}; @@ -128,17 +128,18 @@ class DmlOperatorLayerNormalization : public DmlOperator outputCastOpDesc.Desc = &outputCastDesc; } - DML_MEAN_VARIANCE_NORMALIZATION1_OPERATOR_DESC operatorDesc = {}; + DML_MEAN_VARIANCE_NORMALIZATION2_OPERATOR_DESC operatorDesc = {}; operatorDesc.InputTensor = inputCastOpDesc.Desc ? &inputCastOutputDmlTensorDesc : &inputDesc; operatorDesc.ScaleTensor = scaleCastOpDesc.Desc ? &scaleCastOutputDmlTensorDesc : &scaleDesc; operatorDesc.BiasTensor = biasCastOpDesc.Desc ? &biasCastOutputDmlTensorDesc : (biasDesc.Desc ? &biasDesc : nullptr); operatorDesc.OutputTensor = outputCastOpDesc.Desc ? &outputCastOutputDmlTensorDesc : &outputDesc; operatorDesc.Axes = onnxAxes.data(); operatorDesc.AxisCount = gsl::narrow_cast(onnxAxes.size()); - operatorDesc.NormalizeVariance = true; + operatorDesc.UseMean = !simplified; + operatorDesc.UseVariance = true; operatorDesc.Epsilon = epsilon; operatorDesc.FusedActivation = nullptr; - DML_OPERATOR_DESC opDesc = { DML_OPERATOR_MEAN_VARIANCE_NORMALIZATION1, &operatorDesc }; + DML_OPERATOR_DESC opDesc = { DML_OPERATOR_MEAN_VARIANCE_NORMALIZATION2, &operatorDesc }; // Construct the graph std::vector opDescs; @@ -258,7 +259,19 @@ void CALLBACK QueryLayerNormalization(IMLOperatorSupportQueryContextPrivate* con *isSupported = context->GetOutputCount() == 1; } -DML_OP_DEFINE_CREATION_FUNCTION(LayerNormalization, DmlOperatorLayerNormalization); -DML_OP_DEFINE_CREATION_FUNCTION(LayerNormalization17, DmlOperatorLayerNormalization); +// A specific type of operation for registration. +template +class LayerNormalizationTemplate : public DmlOperatorLayerNormalization +{ +public: + LayerNormalizationTemplate(const MLOperatorKernelCreationContext& kernelCreationContext) + : DmlOperatorLayerNormalization(kernelCreationContext, simplified) + { + } +}; + +DML_OP_DEFINE_CREATION_FUNCTION(LayerNormalization, LayerNormalizationTemplate); +DML_OP_DEFINE_CREATION_FUNCTION(LayerNormalization17, LayerNormalizationTemplate); +DML_OP_DEFINE_CREATION_FUNCTION(SimplifiedLayerNormalization, LayerNormalizationTemplate); } // namespace Dml diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSkipLayerNormalization.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSkipLayerNormalization.cpp index 094c45a0e38e5..11954977228d4 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSkipLayerNormalization.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/DmlOperatorSkipLayerNormalization.cpp @@ -6,6 +6,7 @@ namespace Dml { +template class DmlOperatorSkipLayerNormalization : public DmlOperator { public: @@ -83,17 +84,18 @@ class DmlOperatorSkipLayerNormalization : public DmlOperator inputSkipBiasAddDesc.OutputTensor = &inputDesc; DML_OPERATOR_DESC inputSkipBiasAddOpDesc = { DML_OPERATOR_ELEMENT_WISE_ADD, &inputSkipBiasAddDesc }; - DML_MEAN_VARIANCE_NORMALIZATION1_OPERATOR_DESC mvnDesc = {}; + DML_MEAN_VARIANCE_NORMALIZATION2_OPERATOR_DESC mvnDesc = {}; mvnDesc.InputTensor = &inputDesc; mvnDesc.ScaleTensor = &gammaDesc; mvnDesc.BiasTensor = betaDesc.Desc ? &betaDesc : nullptr; mvnDesc.OutputTensor = &outputDesc; mvnDesc.Axes = axes.data(); mvnDesc.AxisCount = gsl::narrow_cast(axes.size()); - mvnDesc.NormalizeVariance = true; + mvnDesc.UseMean = !simplified; + mvnDesc.UseVariance = true; mvnDesc.Epsilon = epsilon; mvnDesc.FusedActivation = nullptr; - DML_OPERATOR_DESC mvnOpDesc = { DML_OPERATOR_MEAN_VARIANCE_NORMALIZATION1, &mvnDesc }; + DML_OPERATOR_DESC mvnOpDesc = { DML_OPERATOR_MEAN_VARIANCE_NORMALIZATION2, &mvnDesc }; // Construct the graph std::vector opDescs; @@ -223,6 +225,7 @@ void CALLBACK QuerySkipLayerNormalization(IMLOperatorSupportQueryContextPrivate* *isSupported = true; } -DML_OP_DEFINE_CREATION_FUNCTION(SkipLayerNormalization, DmlOperatorSkipLayerNormalization); +DML_OP_DEFINE_CREATION_FUNCTION(SkipLayerNormalization, DmlOperatorSkipLayerNormalization); +DML_OP_DEFINE_CREATION_FUNCTION(SkipSimplifiedLayerNormalization, DmlOperatorSkipLayerNormalization); } // namespace Dml diff --git a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp index 0d0a4149eefd5..2230ee74d9ff6 100644 --- a/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp +++ b/onnxruntime/core/providers/dml/DmlExecutionProvider/src/Operators/OperatorRegistration.cpp @@ -336,6 +336,8 @@ DML_OP_EXTERN_CREATION_FUNCTION(BiasAdd); DML_OP_EXTERN_CREATION_FUNCTION(LRN); DML_OP_EXTERN_CREATION_FUNCTION(MeanVarianceNormalization); DML_OP_EXTERN_CREATION_FUNCTION(GroupNorm); +DML_OP_EXTERN_CREATION_FUNCTION(SimplifiedLayerNormalization); +DML_OP_EXTERN_CREATION_FUNCTION(SkipSimplifiedLayerNormalization); DML_OP_EXTERN_CREATION_FUNCTION(LpNormalization); DML_OP_EXTERN_CREATION_FUNCTION(RNN); DML_OP_EXTERN_CREATION_FUNCTION(GRU); @@ -548,7 +550,7 @@ constexpr static std::array typeNameListAttention = {"T", "M"}; constexpr static std::array typeNameListRotaryEmbedding = {"T", "M"}; constexpr static std::array typeNameListTwo = { "T1", "T2" }; constexpr static std::array typeNameListLayerNorm = { "T", "U" }; -constexpr static std::array typeNameListLayerNormContrib = { "T", "V" }; +constexpr static std::array typeNameListLayerNormContrib = { "T", "U", "V" }; constexpr static std::array typeNameListThree = { "T1", "T2", "T3" }; constexpr static std::array typeNameListFour = { "T1", "T2", "T3", "T4" }; constexpr static std::array typeNameListTopK = { "T", "I" }; @@ -612,7 +614,7 @@ constexpr static std::array supportedTypeListIntege constexpr static std::array supportedTypeListInteger8 = {SupportedTensorDataTypes::Int8|SupportedTensorDataTypes::UInt8 }; constexpr static std::array supportedTypeListRoiAlign = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Int32|SupportedTensorDataTypes::Int64 }; constexpr static std::array supportedTypeListArgMinMax = {SupportedTensorDataTypes::Float16to32|SupportedTensorDataTypes::Ints8to64}; -constexpr static std::array supportedTypeListLayerNormalizationContrib = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32}; +constexpr static std::array supportedTypeListLayerNormalizationContrib = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float16to32}; constexpr static std::array supportedTypeListLayerNormalization = {SupportedTensorDataTypes::Float16to32, SupportedTensorDataTypes::Float32}; constexpr static std::array supportedTypeListShape = {SupportedTensorDataTypes::All, SupportedTensorDataTypes::Int64}; constexpr static std::array supportedTypeListSize = {SupportedTensorDataTypes::All, SupportedTensorDataTypes::Int64}; @@ -1110,7 +1112,9 @@ constexpr static OperatorRegistrationInformation operatorRegistrationInformation {REG_INFO( 10, ConvInteger, typeNameListThree, supportedTypeListInteger, DmlGraphSupport::Supported)}, {REG_INFO( 11, DynamicQuantizeLinear, typeNameListTwo, supportedTypeListDynamicQuantizeLinear, DmlGraphSupport::Supported)}, {REG_INFO( 7, LayerNormalization, typeNameListLayerNormContrib, supportedTypeListLayerNormalizationContrib, DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QueryLayerNormalization)}, + {REG_INFO( 7, SimplifiedLayerNormalization, typeNameListLayerNormContrib, supportedTypeListLayerNormalizationContrib, DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QueryLayerNormalization)}, {REG_INFO_MS( 1, SkipLayerNormalization, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QuerySkipLayerNormalization)}, + {REG_INFO_MS( 1, SkipSimplifiedLayerNormalization, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported, requiredConstantCpuInputs(), std::nullopt, QuerySkipLayerNormalization)}, {REG_INFO_MS( 1, EmbedLayerNormalization, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO_MS( 1, BiasSplitGelu, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, {REG_INFO_MS( 1, BiasAdd, typeNameListDefault, supportedTypeListFloat16to32, DmlGraphSupport::Supported)}, diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h index 090a70c73d97c..42468d7829c8f 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorHelper.h @@ -1613,7 +1613,9 @@ using ShapeInferenceHelper_GroupNorm = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_LayerNormalization = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_LayerNormalization17 = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_SkipLayerNormalization = SkipLayerNormHelper; +using ShapeInferenceHelper_SkipSimplifiedLayerNormalization = SkipLayerNormHelper; using ShapeInferenceHelper_EmbedLayerNormalization = EmbedLayerNormalizationHelper; +using ShapeInferenceHelper_SimplifiedLayerNormalization = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_LpNormalization = GetOutputShapeAsInputShapeHelper; using ShapeInferenceHelper_RNN = RecurrentHelper; using ShapeInferenceHelper_GRU = RecurrentHelper; diff --git a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h index e95f5edbf0b1c..9d2f88008185b 100644 --- a/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h +++ b/onnxruntime/core/providers/dml/OperatorAuthorHelper/OperatorVersions.h @@ -124,6 +124,7 @@ namespace OperatorHelper static const int sc_sinceVer_Upsample = 7; static const int sc_sinceVer_Xor = 7; static const int sc_sinceVer_LayerNormalization = 1; + static const int sc_sinceVer_SimplifiedLayerNormalization = 1; // Special operators static const int sc_sinceVer_MemcpyToHost = 1; @@ -454,6 +455,7 @@ namespace OperatorHelper static const int sc_sinceVer_MatMulIntegerToFloat = 1; static const int sc_sinceVer_MultiHeadAttention = 1; static const int sc_sinceVer_SkipLayerNormalization = 1; + static const int sc_sinceVer_SkipSimplifiedLayerNormalization = 1; static const int sc_sinceVer_EmbedLayerNormalization = 1; static const int sc_sinceVer_BiasSplitGelu = 1; static const int sc_sinceVer_NhwcConv = 1; diff --git a/onnxruntime/test/contrib_ops/layer_norm_test.cc b/onnxruntime/test/contrib_ops/layer_norm_test.cc index f75a60c4a753a..438a1100ca95c 100644 --- a/onnxruntime/test/contrib_ops/layer_norm_test.cc +++ b/onnxruntime/test/contrib_ops/layer_norm_test.cc @@ -43,8 +43,6 @@ static void TestLayerNorm(const std::vector& x_dims, // TODO keep_dims is not implemented, default behavior is to keep ones for reduced dimensions ASSERT_NE(keep_dims, 0); - const std::vector& stats_dims = keep_dims ? n_and_ones_dims : n_dims; - CompareOpTester test(op.c_str(), opset); test.AddAttribute("axis", axis); test.AddAttribute("keep_dims", keep_dims); @@ -65,16 +63,20 @@ static void TestLayerNorm(const std::vector& x_dims, } std::vector Y_data = FillZeros(n_x_m_dims); + test.AddOutput("output", n_x_m_dims, Y_data); + +#ifndef USE_DML + // DML doesn't support more than one output for these ops yet + const std::vector& stats_dims = keep_dims ? n_and_ones_dims : n_dims; std::vector mean_data = FillZeros(stats_dims); std::vector var_data = FillZeros(stats_dims); - test.AddOutput("output", n_x_m_dims, Y_data); - // the Main and InvStdDev outputs are training specific if (op.compare(SIMPLIFIED_LAYER_NORM_OP) != 0) { test.AddOutput("mean", stats_dims, mean_data); } test.AddOutput("var", stats_dims, var_data); +#endif #ifdef USE_CUDA test.CompareWithCPU(kCudaExecutionProvider); diff --git a/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc b/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc index bb56a5aba7f65..edf9064bb43c9 100644 --- a/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc +++ b/onnxruntime/test/contrib_ops/skiplayernorm_op_test.cc @@ -731,8 +731,6 @@ TEST(SkipLayerNormTest, SkipLayerNormBatch2_TokenCount) { true); } -// SkipSimplifiedLayerNorm has not been enabled for DML yet -#if !defined(USE_DML) TEST(SkipLayerNormTest, SkipSimplifiedLayerNormBatch1_Float16) { int batch_size = 1; int sequence_length = 2; @@ -768,9 +766,8 @@ TEST(SkipLayerNormTest, SkipSimplifiedLayerNormBatch1_Float16) { true, true); } -#endif -#if !defined(USE_ROCM) && !defined(USE_DML) +#if !defined(USE_ROCM) TEST(SkipLayerNormTest, SkipLayerNormBatch2_Skip_Broadcast_No_Batch_Size) { int batch_size = 2; int sequence_length = 2; From 12569626cb36d0a6475bd6ab7993da1a7b547787 Mon Sep 17 00:00:00 2001 From: Patrice Vignola Date: Thu, 18 Apr 2024 22:43:41 -0700 Subject: [PATCH 3/4] Update DML to 1.14.1 (#20380) ### Description ### Motivation and Context --- .pipelines/nuget_config/x64/packages.config | 2 +- .pipelines/nuget_config/x86/packages.config | 2 +- cmake/external/dml.cmake | 2 +- packages.config | 2 +- tools/nuget/generate_nuspec_for_native_nuget.py | 2 +- 5 files changed, 5 insertions(+), 5 deletions(-) diff --git a/.pipelines/nuget_config/x64/packages.config b/.pipelines/nuget_config/x64/packages.config index ba9722ce191a9..9066e13ee1c8d 100644 --- a/.pipelines/nuget_config/x64/packages.config +++ b/.pipelines/nuget_config/x64/packages.config @@ -1,6 +1,6 @@  - + diff --git a/.pipelines/nuget_config/x86/packages.config b/.pipelines/nuget_config/x86/packages.config index cf86084923eea..a8e5b35b28b36 100644 --- a/.pipelines/nuget_config/x86/packages.config +++ b/.pipelines/nuget_config/x86/packages.config @@ -1,6 +1,6 @@  - + diff --git a/cmake/external/dml.cmake b/cmake/external/dml.cmake index acb6f02b21acc..f74b694471203 100644 --- a/cmake/external/dml.cmake +++ b/cmake/external/dml.cmake @@ -41,7 +41,7 @@ if (NOT onnxruntime_USE_CUSTOM_DIRECTML) set(NUGET_CONFIG ${PROJECT_SOURCE_DIR}/../NuGet.config) set(PACKAGES_CONFIG ${PROJECT_SOURCE_DIR}/../packages.config) get_filename_component(PACKAGES_DIR ${CMAKE_CURRENT_BINARY_DIR}/../packages ABSOLUTE) - set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.14.0) + set(DML_PACKAGE_DIR ${PACKAGES_DIR}/Microsoft.AI.DirectML.1.14.1) # Restore nuget packages, which will pull down the DirectML redist package. add_custom_command( diff --git a/packages.config b/packages.config index fd9c33153a63b..3f3e4f5298881 100644 --- a/packages.config +++ b/packages.config @@ -1,6 +1,6 @@  - + diff --git a/tools/nuget/generate_nuspec_for_native_nuget.py b/tools/nuget/generate_nuspec_for_native_nuget.py index 40a0df42b5340..670e595e03e40 100644 --- a/tools/nuget/generate_nuspec_for_native_nuget.py +++ b/tools/nuget/generate_nuspec_for_native_nuget.py @@ -219,7 +219,7 @@ def add_common_dependencies(xml_text, package_name, version): def generate_dependencies(xml_text, package_name, version): - dml_dependency = '' + dml_dependency = '' if package_name == "Microsoft.AI.MachineLearning": xml_text.append("") From 9001c69b84898bbbe3529b0333a9d16df5f6469e Mon Sep 17 00:00:00 2001 From: Yueqing Zhang Date: Fri, 19 Apr 2024 14:05:44 +0800 Subject: [PATCH 4/4] [VitisAI] Add Version Check. Requsted by Microsoft (#20347) ### Description Add version for onnxruntime_providers_vitisai.dll. So, the onnxruntime_vitisai_ep.dll can check if the version is compatible. To make sure the old onnxruntime_vitisai_ep.dll still work, we would offset the api struct by version field. ### Motivation and Context This is the direct request from Microsoft. The following is the problem we try to solve: How would you describe the dependency between (a) onnxruntime_vitisai_ep.dll and (b) onnxruntime_providers_vitisai.dll? E.g. for each version of (a) there is a minimum required version of (b), or for each version of (b) there is minimum required version of (a). Please note that in practice we won't be able to use the exact version of ORT/EP that you tested against (because we might need to update ORT for other reasons), but we might be able to accommodate some version constraints that you specify. As we approach shipping, we'll lock the version of ORT/EP to allow for stabilization and more detailed testing (and work with you if it needs to be updated). --- .../core/providers/vitisai/imp/global_api.cc | 18 +++++++++++++++++- .../vitisai/include/vaip/vaip_ort_api.h | 8 ++++++++ 2 files changed, 25 insertions(+), 1 deletion(-) diff --git a/onnxruntime/core/providers/vitisai/imp/global_api.cc b/onnxruntime/core/providers/vitisai/imp/global_api.cc index eba3230d283cf..ef08566b06dce 100644 --- a/onnxruntime/core/providers/vitisai/imp/global_api.cc +++ b/onnxruntime/core/providers/vitisai/imp/global_api.cc @@ -52,6 +52,7 @@ struct OrtVitisAIEpAPI { const char* json_config); std::vector>* (*compile_onnx_model_with_options)( const std::string& model_path, const onnxruntime::Graph& graph, const onnxruntime::ProviderOptions& options); + uint32_t (*vaip_get_version)(); void Ensure() { if (handle_) return; @@ -65,6 +66,8 @@ struct OrtVitisAIEpAPI { ::onnxruntime::LogRuntimeError(0, status1, __FILE__, static_cast(__FUNCTION__), __LINE__); ORT_THROW(status1); } + std::ignore = env.GetSymbolFromLibrary(handle_, "vaip_get_version", + (void**)&vaip_get_version); } private: @@ -177,8 +180,17 @@ void initialize_vitisai_ep() { create_kernel_registry(s_domains_vitisaiep); } +static void set_version_info(vaip_core::OrtApiForVaip& api) { + const char* magic = "VAIP"; + std::memcpy(reinterpret_cast(&api.magic), magic, sizeof(api.magic)); + api.major = 1u; + api.minor = 0u; + api.patch = 0u; +} + vaip_core::OrtApiForVaip* create_org_api_hook() { InitProviderOrtApi(); + set_version_info(the_global_api); the_global_api.host_ = Provider_GetHost(); assert(Ort::Global::api_ != nullptr); the_global_api.ort_api_ = Ort::Global::api_; @@ -359,5 +371,9 @@ vaip_core::OrtApiForVaip* create_org_api_hook() { the_global_api.get_lib_id = []() -> vaip_core::DllSafe { return vaip_core::DllSafe(std::string(GIT_COMMIT_ID)); }; - return &the_global_api; + if (!s_library_vitisaiep.vaip_get_version) { + return reinterpret_cast(&(the_global_api.host_)); + } else { + return &the_global_api; + } } diff --git a/onnxruntime/core/providers/vitisai/include/vaip/vaip_ort_api.h b/onnxruntime/core/providers/vitisai/include/vaip/vaip_ort_api.h index ae5f71d66269c..51de58fbdd463 100644 --- a/onnxruntime/core/providers/vitisai/include/vaip/vaip_ort_api.h +++ b/onnxruntime/core/providers/vitisai/include/vaip/vaip_ort_api.h @@ -13,6 +13,14 @@ struct OrtApi; namespace vaip_core { struct OrtApiForVaip { + uint32_t magic; // 'VAIP' or something else to make sure the following field + // are not garbage. + uint32_t major; // bump this field changes that are not backward compatible or + // that represent a change in direction for the project + uint32_t minor; // bump this field for adding new features without breaking + // existing behavior + uint32_t patch; // bump this field for fixing some bugs but not introducing + // new functionality onnxruntime::ProviderHost* host_; const OrtApi* ort_api_; // model