Skip to content

Commit

Permalink
Merge branch 'main' of https://github.com/microsoft/onnxruntime into …
Browse files Browse the repository at this point in the history
…pythonop_inplace
  • Loading branch information
pengwa committed Oct 8, 2023
2 parents 430be4b + c2bd5b7 commit a3f2f46
Show file tree
Hide file tree
Showing 5 changed files with 30 additions and 12 deletions.
2 changes: 1 addition & 1 deletion cmake/onnxruntime.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -240,7 +240,7 @@ install(TARGETS onnxruntime
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
FRAMEWORK DESTINATION ${CMAKE_INSTALL_BINDIR})


Expand Down
12 changes: 10 additions & 2 deletions cmake/onnxruntime_providers.cmake
Original file line number Diff line number Diff line change
Expand Up @@ -366,7 +366,7 @@ if (NOT onnxruntime_MINIMAL_BUILD AND NOT onnxruntime_EXTENDED_MINIMAL_BUILD
install(TARGETS onnxruntime_providers_shared
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR}
)
endif()

Expand Down Expand Up @@ -819,7 +819,7 @@ if (onnxruntime_USE_TENSORRT)
PUBLIC_HEADER DESTINATION ${CMAKE_INSTALL_INCLUDEDIR}/onnxruntime
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
RUNTIME DESTINATION ${CMAKE_INSTALL_LIBDIR})
RUNTIME DESTINATION ${CMAKE_INSTALL_BINDIR})
endif()

if (onnxruntime_USE_VITISAI)
Expand Down Expand Up @@ -1434,6 +1434,14 @@ if (onnxruntime_USE_MIGRAPHX)
message(STATUS "MIGRAPHX GPU STREAM SYNC is DISABLED")
endif()

if (onnxruntime_ENABLE_TRAINING_OPS)
onnxruntime_add_include_to_target(onnxruntime_providers_migraphx onnxruntime_training)
target_link_libraries(onnxruntime_providers_migraphx PRIVATE onnxruntime_training)
if (onnxruntime_ENABLE_TRAINING_TORCH_INTEROP)
onnxruntime_add_include_to_target(onnxruntime_providers_migraphx Python::Module)
endif()
endif()

install(TARGETS onnxruntime_providers_migraphx
ARCHIVE DESTINATION ${CMAKE_INSTALL_LIBDIR}
LIBRARY DESTINATION ${CMAKE_INSTALL_LIBDIR}
Expand Down
23 changes: 15 additions & 8 deletions onnxruntime/core/providers/cuda/cuda_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -373,7 +373,7 @@ Status CUDAExecutionProvider::OnRunStart() {
// always set CUDA device when session::Run() in case it runs in a worker thread
CUDA_RETURN_IF_ERROR(cudaSetDevice(GetDeviceId()));
if (IsGraphCaptureEnabled() && GetPerThreadContext().IsGraphCaptureAllowed() && !GetPerThreadContext().IsGraphCaptured()) {
LOGS_DEFAULT(INFO) << "Capturing the cuda graph for this model";
LOGS(*GetLogger(), INFO) << "Capturing the cuda graph for this model";
GetPerThreadContext().CaptureBegin();
}
return Status::OK();
Expand Down Expand Up @@ -2410,7 +2410,7 @@ static bool RNNNeedFallbackToCPU(const onnxruntime::Node& node,
return false;
}

static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node, const logging::Logger& logger) {
const auto& node_attributes = node.GetAttributes();
// Check attributes
for (auto& attr : node_attributes) {
Expand All @@ -2428,7 +2428,7 @@ static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
int rank = pads_size / 2;
for (int i = 0; i < rank; i++) {
if (pads.Get(i) != pads.Get(i + rank)) {
LOGS_DEFAULT(WARNING) << "Dropping the ConvTranspose node: " << node.Name()
LOGS(logger, WARNING) << "Dropping the ConvTranspose node: " << node.Name()
<< " to CPU because it requires asymmetric padding which the CUDA EP"
<< " currently does not support";
return true;
Expand All @@ -2450,7 +2450,7 @@ static bool ConvTransposeNeedFallbackToCPU(const onnxruntime::Node& node) {
// symmetric padding.
// TODO: Remove this after we have supported asymmetric padding in the CUDA ConvTranspose kernel
if (auto_pad_attr == "SAME_UPPER" || auto_pad_attr == "SAME_LOWER") {
LOGS_DEFAULT(WARNING) << "Dropping the ConvTranspose node: " << node.Name()
LOGS(logger, WARNING) << "Dropping the ConvTranspose node: " << node.Name()
<< " to CPU because it uses the auto_pad attribute which may lead to asymmetric padding which"
<< " the CUDA EP currently does not support";
return true;
Expand Down Expand Up @@ -2487,20 +2487,26 @@ std::vector<std::unique_ptr<ComputeCapability>>
CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
const IKernelLookup& kernel_lookup) const {
InlinedVector<NodeIndex> candidates;
// A subset of the above vector. A subset of the tentative_nodes might be moved to CPU.
InlinedVector<NodeIndex> tentative_nodes;
const logging::Logger& logger = *GetLogger();
for (auto& node_index : graph.GetNodesInTopologicalOrder()) {
const auto* p_node = graph.GetNode(node_index);
if (p_node == nullptr)
continue;

const auto& node = *p_node;
if (!node.GetExecutionProviderType().empty()) {
if (node.GetExecutionProviderType() == kCudaExecutionProvider) {
candidates.push_back(node.Index());
}
continue;
}

const KernelCreateInfo* cuda_kernel_def = kernel_lookup.LookUpKernel(node);
// none of the provided registries has a CUDA kernel for this node
if (cuda_kernel_def == nullptr) {
LOGS_DEFAULT(INFO) << "CUDA kernel not found in registries for Op type: " << node.OpType() << " node name: " << node.Name();
LOGS(logger, INFO) << "CUDA kernel not found in registries for Op type: " << node.OpType() << " node name: " << node.Name();
continue;
}

Expand All @@ -2520,7 +2526,7 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,
not_supported = RNNNeedFallbackToCPU(node, activations_supported, node.OpType());
force_inside = !not_supported;
} else if ("ConvTranspose" == node.OpType()) {
not_supported = ConvTransposeNeedFallbackToCPU(node);
not_supported = ConvTransposeNeedFallbackToCPU(node, logger);
force_inside = !not_supported;
} else if ("Cast" == node.OpType()) {
not_supported = CastNeedFallbackToCPU(node);
Expand All @@ -2529,17 +2535,18 @@ CUDAExecutionProvider::GetCapability(const onnxruntime::GraphViewer& graph,

if (!force_inside && not_supported) {
if (not_supported) {
LOGS_DEFAULT(WARNING) << "CUDA kernel not supported. Fallback to CPU execution provider for Op type: " << node.OpType() << " node name: " << node.Name();
LOGS(logger, WARNING) << "CUDA kernel not supported. Fallback to CPU execution provider for Op type: " << node.OpType() << " node name: " << node.Name();
}
} else {
tentative_nodes.push_back(node.Index());
candidates.push_back(node.Index());
}
}

// For CUDA EP, exclude the subgraph that is preferred to be placed in CPU
// These are usually shape related computation subgraphs
// Following logic can be extended for other EPs
auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, candidates);
auto cpu_nodes = GetCpuPreferredNodes(graph, kernel_lookup, tentative_nodes);
std::vector<std::unique_ptr<ComputeCapability>> result;
for (auto& node_index : candidates) {
if (cpu_nodes.count(node_index) > 0)
Expand Down
3 changes: 3 additions & 0 deletions onnxruntime/core/providers/shared_library/provider_api.h
Original file line number Diff line number Diff line change
Expand Up @@ -350,6 +350,9 @@ void InitProviderOrtApi();
if ((logger).OutputIsEnabled(::onnxruntime::logging::Severity::k##severity, ::onnxruntime::logging::DataType::SYSTEM)) \
CREATE_MESSAGE(logger, severity, category, ::onnxruntime::logging::DataType::SYSTEM)->Stream()

#define LOGS(logger, severity) \
LOGS_CATEGORY(logger, severity, ::onnxruntime::logging::Category::onnxruntime)

#define LOGS_DEFAULT_CATEGORY(severity, category) \
LOGS_CATEGORY(::onnxruntime::logging::LoggingManager::DefaultLogger(), severity, category)

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@ pandas
scikit-learn
numpy==1.21.6 ; python_version < '3.11'
numpy==1.24.2 ; python_version >= '3.11'
transformers==v4.4.2
transformers==v4.16.1
rsa==4.9
tensorboard>=2.2.0,<2.5.0
h5py
Expand Down

0 comments on commit a3f2f46

Please sign in to comment.