Skip to content

Commit

Permalink
Initial PR for VSINPU execution provider (#20903)
Browse files Browse the repository at this point in the history
### Description
<!-- Describe your changes. -->
-It is an initial PR for VSINPU execution provider



### Motivation and Context
<!-- - Why is this change required? What problem does it solve?
- If it fixes an open issue, please link to the issue here. -->
- For support VeriSilicon hardware
- TIM-VX(Tensor Interface Module)
(https://github.com/VeriSilicon/TIM-VX) is an integrated software
solution by Verisilicon for our hardware(A311D/i.MX 8M Plus etc.)
design, it is easy to use Verisilicon’s hardware by simply connecting
onnxruntime with the TIM-VX API by this VSINPU execution provider.
  • Loading branch information
chenfeiyue-cfy authored Jun 29, 2024
1 parent 9007ede commit 56b36a5
Show file tree
Hide file tree
Showing 65 changed files with 5,096 additions and 3 deletions.
6 changes: 6 additions & 0 deletions cmake/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -101,6 +101,7 @@ option(onnxruntime_BUILD_OBJC "Build Objective-C library" OFF)
option(onnxruntime_USE_PREINSTALLED_EIGEN "Use pre-installed EIGEN. Need to provide eigen_SOURCE_PATH if turn this on." OFF)
option(onnxruntime_BUILD_BENCHMARKS "Build ONNXRuntime micro-benchmarks" OFF)
option(onnxruntime_USE_LLVM "Build TVM with LLVM" OFF)
option(onnxruntime_USE_VSINPU "Build with VSINPU support" OFF)

cmake_dependent_option(onnxruntime_USE_FLASH_ATTENTION "Build flash attention kernel for scaled dot product attention" ON "onnxruntime_USE_CUDA" OFF)
option(onnxruntime_USE_MEMORY_EFFICIENT_ATTENTION "Build memory efficient attention kernel for scaled dot product attention" ON)
Expand Down Expand Up @@ -797,6 +798,11 @@ if (onnxruntime_USE_RKNPU)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_RKNPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES rknpu)
endif()
if (onnxruntime_USE_VSINPU)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_VSINPU=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_VSINPU=1)
list(APPEND ONNXRUNTIME_PROVIDER_NAMES vsinpu)
endif()
if (onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND ORT_PROVIDER_FLAGS -DUSE_NNAPI=1)
list(APPEND ORT_PROVIDER_CMAKE_FLAGS -Donnxruntime_USE_NNAPI_BUILTIN=1)
Expand Down
1 change: 1 addition & 0 deletions cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -189,6 +189,7 @@ set(onnxruntime_INTERNAL_LIBRARIES
${PROVIDERS_SNPE}
${PROVIDERS_TVM}
${PROVIDERS_RKNPU}
${PROVIDERS_VSINPU}
${PROVIDERS_XNNPACK}
${PROVIDERS_WEBNN}
${PROVIDERS_AZURE}
Expand Down
32 changes: 32 additions & 0 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -80,6 +80,9 @@ endif()
if(onnxruntime_USE_RKNPU)
set(PROVIDERS_RKNPU onnxruntime_providers_rknpu)
endif()
if(onnxruntime_USE_VSINPU)
set(PROVIDERS_VSINPU onnxruntime_providers_vsinpu)
endif()
if(onnxruntime_USE_DML)
set(PROVIDERS_DML onnxruntime_providers_dml)
endif()
Expand Down Expand Up @@ -188,6 +191,35 @@ if (onnxruntime_USE_TVM)
include(onnxruntime_providers_tvm.cmake)
endif()

if (onnxruntime_USE_VSINPU)
add_definitions(-DUSE_VSINPU=1)
set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} -Wno-unused-parameter")
file(GLOB_RECURSE onnxruntime_providers_vsinpu_srcs
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/builders/*.cc"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.h"
"${ONNXRUNTIME_ROOT}/core/providers/vsinpu/*.cc"
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.h"
"${ONNXRUNTIME_ROOT}/core/providers/shared/utils/utils.cc"
)
source_group(TREE ${ONNXRUNTIME_ROOT}/core FILES ${onnxruntime_providers_vsinpu_srcs})
add_library(onnxruntime_providers_vsinpu ${onnxruntime_providers_vsinpu_srcs})
onnxruntime_add_include_to_target(onnxruntime_providers_vsinpu
onnxruntime_common onnxruntime_framework onnx onnx_proto protobuf::libprotobuf-lite flatbuffers Boost::mp11
safeint_interface nsync::nsync_cpp)
add_dependencies(onnxruntime_providers_vsinpu ${onnxruntime_EXTERNAL_DEPENDENCIES})
set_target_properties(onnxruntime_providers_vsinpu PROPERTIES FOLDER "ONNXRuntime" LINKER_LANGUAGE CXX)
target_include_directories(onnxruntime_providers_vsinpu PRIVATE ${ONNXRUNTIME_ROOT} $ENV{TIM_VX_INSTALL}/include)

find_library(TIMVX_LIBRARY NAMES tim-vx PATHS $ENV{TIM_VX_INSTALL}/lib NO_DEFAULT_PATH)
if(TIMVX_LIBRARY)
target_link_libraries(onnxruntime_providers_vsinpu PRIVATE ${TIMVX_LIBRARY})
else()
message(FATAL_ERROR "Cannot find TIM-VX library!")
endif()

endif()

if (onnxruntime_USE_XNNPACK)
include(onnxruntime_providers_xnnpack.cmake)
endif()
Expand Down
5 changes: 5 additions & 0 deletions cmake/onnxruntime_unittests.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -546,6 +546,10 @@ if(onnxruntime_USE_NNAPI_BUILTIN)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_nnapi)
endif()

if(onnxruntime_USE_VSINPU)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_vsinpu)
endif()

if(onnxruntime_USE_JSEP)
list(APPEND onnxruntime_test_providers_dependencies onnxruntime_providers_js)
endif()
Expand Down Expand Up @@ -589,6 +593,7 @@ set(ONNXRUNTIME_TEST_LIBS
${onnxruntime_libs}
# CUDA, ROCM, TENSORRT, MIGRAPHX, DNNL, and OpenVINO are dynamically loaded at runtime
${PROVIDERS_NNAPI}
${PROVIDERS_VSINPU}
${PROVIDERS_JS}
${PROVIDERS_QNN}
${PROVIDERS_SNPE}
Expand Down
1 change: 1 addition & 0 deletions include/onnxruntime/core/graph/constants.h
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ constexpr const char* kXnnpackExecutionProvider = "XnnpackExecutionProvider";
constexpr const char* kWebNNExecutionProvider = "WebNNExecutionProvider";
constexpr const char* kCannExecutionProvider = "CANNExecutionProvider";
constexpr const char* kAzureExecutionProvider = "AzureExecutionProvider";
constexpr const char* kVSINPUExecutionProvider = "VSINPUExecutionProvider";

constexpr const char* kExecutionProviderSharedLibraryPath = "shared_lib_path";
constexpr const char* kExecutionProviderSharedLibraryEntry = "provider_factory_entry_point";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,34 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include "onnxruntime_c_api.h"

#ifdef __cplusplus
extern "C" {
#endif

ORT_API_STATUS(OrtSessionOptionsAppendExecutionProvider_VSINPU, _In_ OrtSessionOptions* options);

#ifdef __cplusplus
}
#endif
9 changes: 8 additions & 1 deletion onnxruntime/core/framework/node_unit.cc
Original file line number Diff line number Diff line change
Expand Up @@ -285,7 +285,7 @@ void NodeUnit::InitForSingleNode() {
const auto& output_defs = target_node_.OutputDefs();
const auto& node_attrs = target_node_.GetAttributes();
auto qlinear_type = GetQLinearOpType(target_node_);
if (qlinear_type == QLinearOpType::Unknown || IsVariadicQLinearOp(qlinear_type)) { // TODO, add variadic support
if (qlinear_type == QLinearOpType::Unknown) {
// Not a Qlinear op, add all inputs / outputs
auto add_all_io = [](std::vector<NodeUnitIODef>& defs,
const ConstPointerContainer<std::vector<NodeArg*>>& node_defs) {
Expand Down Expand Up @@ -351,6 +351,13 @@ void NodeUnit::InitForSingleNode() {
NodeUnitIODef::QuantParam{*input_defs[1],
input_defs.size() == 3 ? input_defs[2] : nullptr,
axis}});
} else if (IsVariadicQLinearOp(qlinear_type)) {
size_t input_num = (input_defs.size() - 2) / 3;
for (size_t i = 0; i < input_num; i++) {
inputs_.push_back(NodeUnitIODef{*input_defs[3 * i + 2], NodeUnitIODef::QuantParam{*input_defs[3 * i + 3],
input_defs[3 * i + 4]}});
}
outputs_.push_back(NodeUnitIODef{*output_defs[0], NodeUnitIODef::QuantParam{*input_defs[0], input_defs[1]}});
} else {
ORT_THROW("The QLinear op [", static_cast<uint8_t>(qlinear_type), "] is not supported");
}
Expand Down
1 change: 1 addition & 0 deletions onnxruntime/core/framework/utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,7 @@ bool ProviderIsCpuBased(const std::string& provider_type) {
provider_type == onnxruntime::kVitisAIExecutionProvider ||
provider_type == onnxruntime::kOpenVINOExecutionProvider ||
provider_type == onnxruntime::kNnapiExecutionProvider ||
provider_type == onnxruntime::kVSINPUExecutionProvider ||
provider_type == onnxruntime::kAclExecutionProvider ||
provider_type == onnxruntime::kArmNNExecutionProvider ||
provider_type == onnxruntime::kRknpuExecutionProvider ||
Expand Down
8 changes: 8 additions & 0 deletions onnxruntime/core/providers/get_execution_providers.cc
Original file line number Diff line number Diff line change
Expand Up @@ -98,6 +98,14 @@ constexpr ProviderInfo kProvidersInPriorityOrder[] =
true,
#else
false,
#endif
},
{
kVSINPUExecutionProvider,
#ifdef USE_VSINPU
true,
#else
false,
#endif
},
{
Expand Down
4 changes: 4 additions & 0 deletions onnxruntime/core/providers/provider_factory_creators.h
Original file line number Diff line number Diff line change
Expand Up @@ -46,6 +46,10 @@
#include "core/providers/nnapi/nnapi_provider_factory_creator.h"
#endif

#if defined(USE_VSINPU)
#include "core/providers/vsinpu/vsinpu_provider_factory_creator.h"
#endif

#if defined(USE_JSEP)
#include "core/providers/js/js_provider_factory_creator.h"
#endif
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,130 @@
/****************************************************************************
*
* Copyright (c) 2023 Vivante Corporation
*
* Permission is hereby granted, free of charge, to any person obtaining a
* copy of this software and associated documentation files (the "Software"),
* to deal in the Software without restriction, including without limitation
* the rights to use, copy, modify, merge, publish, distribute, sublicense,
* and/or sell copies of the Software, and to permit persons to whom the
* Software is furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING
* FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER
* DEALINGS IN THE SOFTWARE.
*
*****************************************************************************/
#include <memory>
#include <vector>
#include <utility>
#include "core/providers/shared/utils/utils.h"
#include "core/providers/vsinpu/builders/impl/base_op_builder.h"

namespace onnxruntime {
namespace vsi {
namespace npu {
class ReluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Relu Activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Relu>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class SigmoidOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Sigmoid Activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Sigmoid>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
class TanhOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Tanh activation.";
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::Tanh>();
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};

class LeakyReluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating LeakyRelu activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto op =
graph_ep->GetGraph()->CreateOperation<tim::vx::ops::LeakyRelu>(alpha);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};

class EluOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating Elu activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto op =
graph_ep->GetGraph()->CreateOperation<tim::vx::ops::LeakyRelu>(alpha);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};

class HardSigmoidOpBuilder : public BaseOpBuilder {
public:
bool HandleBuildOp(vsi::npu::GraphEP* graph_ep,
std::vector<std::shared_ptr<tim::vx::Tensor>>& inputs,
std::vector<std::shared_ptr<tim::vx::Tensor>>& outputs,
const NodeUnit& node_unit) override {
LOGS_DEFAULT(VERBOSE) << "Creating HardSigmoid activation.";
const auto& node = node_unit.GetNode();
NodeAttrHelper helper(node);
auto alpha = helper.Get("alpha", 1.0f);
auto beta = helper.Get("beta", 1.0f);
auto op = graph_ep->GetGraph()->CreateOperation<tim::vx::ops::HardSigmoid>(
alpha, beta);
(*op).BindInputs(inputs).BindOutputs(outputs);
graph_ep->GetOps().push_back(std::move(op));
return true;
}
};
} // namespace npu

} // namespace vsi
} // namespace onnxruntime
Loading

0 comments on commit 56b36a5

Please sign in to comment.