Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

ExecutionProvider API refactor - make GenerateMetaDefId a standalone function, decouple it from EP #18977

Merged
merged 17 commits into from
Jan 26, 2024
Merged
Show file tree
Hide file tree
Changes from 3 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
35 changes: 3 additions & 32 deletions include/onnxruntime/core/framework/execution_provider.h
Original file line number Diff line number Diff line change
Expand Up @@ -59,14 +59,11 @@

class IExecutionProvider {
protected:
IExecutionProvider(const std::string& type, bool use_metadef_id_creator = false)
: IExecutionProvider(type, OrtDevice(), use_metadef_id_creator) {}
IExecutionProvider(const std::string& type)

Check warning on line 62 in include/onnxruntime/core/framework/execution_provider.h

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] include/onnxruntime/core/framework/execution_provider.h#L62

Single-parameter constructors should be marked explicit. [runtime/explicit] [5]
Raw output
include/onnxruntime/core/framework/execution_provider.h:62:  Single-parameter constructors should be marked explicit.  [runtime/explicit] [5]
: IExecutionProvider(type, OrtDevice()) {}

IExecutionProvider(const std::string& type, OrtDevice device, bool use_metadef_id_creator = false)
IExecutionProvider(const std::string& type, OrtDevice device)
: default_device_(device), type_{type} {
if (use_metadef_id_creator) {
metadef_id_generator_ = std::make_unique<ModelMetadefIdGenerator>();
}
}

/*
Expand Down Expand Up @@ -274,19 +271,6 @@
return logger_;
}

/** Generate a unique id that can be used in a MetaDef name. Values are unique for a model instance.
The model hash is also returned if you wish to include that in the MetaDef name to ensure uniqueness across models.
@param graph_viewer[in] Graph viewer that GetCapability was called with. Can be for the main graph or nested graph.
@param model_hash[out] Returns the hash for the main (i.e. top level) graph in the model.
This is created using the model path if available,
or the model input names and the output names from all nodes in the main graph.
@remarks e.g. the TensorRT Execution Provider is used in multiple sessions and the underlying infrastructure caches
compiled kernels, so the name must be unique and deterministic across models and sessions.
NOTE: Ideally this would be a protected method, but to work across the EP bridge it has to be public and
virtual, and ModelMetadefIdGenerator but be defined in the header as well.
*/
virtual int GenerateMetaDefId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) const;

virtual std::unique_ptr<profiling::EpProfiler> GetProfiler() {
return {};
}
Expand Down Expand Up @@ -331,18 +315,5 @@

// It will be set when this object is registered to a session
const logging::Logger* logger_ = nullptr;

// helper to generate ids that are unique to model and deterministic, even if the execution provider is shared across
// multiple sessions.
class ModelMetadefIdGenerator {
public:
int GenerateId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash);

private:
std::unordered_map<HashValue, HashValue> main_graph_hash_; // map graph instance hash to model contents hash
std::unordered_map<HashValue, int> model_metadef_id_; // current unique id for model
};

std::unique_ptr<ModelMetadefIdGenerator> metadef_id_generator_;
};
} // namespace onnxruntime
73 changes: 0 additions & 73 deletions onnxruntime/core/framework/execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -35,77 +35,4 @@ common::Status IExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
}

#endif

int IExecutionProvider::ModelMetadefIdGenerator::GenerateId(const onnxruntime::GraphViewer& graph_viewer,
HashValue& model_hash) {
model_hash = 0;

// find the top level graph
const Graph* cur_graph = &graph_viewer.GetGraph();
while (cur_graph->IsSubgraph()) {
cur_graph = cur_graph->ParentGraph();
}

uint32_t instance_hash[4] = {0, 0, 0, 0};

const Graph& main_graph = *cur_graph;

// hash the bytes in the Graph instance. we can't just use the address as a new Graph instance may use
// the same memory (unit tests prove this can occur). the raw bytes of the Graph instance should be a unique
// fingerprint for the instance that can use used as the key to the hash of the model path/contents.
MurmurHash3::x86_128(&main_graph, gsl::narrow_cast<int32_t>(sizeof(Graph)), instance_hash[0], &instance_hash);
HashValue graph_instance_hash = instance_hash[0] | (uint64_t(instance_hash[1]) << 32);

// if we've already hashed this main graph instance use the cached value
auto entry = main_graph_hash_.find(graph_instance_hash);
if (entry != main_graph_hash_.cend()) {
model_hash = entry->second;
} else {
uint32_t hash[4] = {0, 0, 0, 0};

// prefer path the model was loaded from
// this may not be available if the model was loaded from a stream or in-memory bytes
const auto& model_path_str = main_graph.ModelPath().ToPathString();
if (!model_path_str.empty()) {
MurmurHash3::x86_128(model_path_str.data(), gsl::narrow_cast<int32_t>(model_path_str.size()), hash[0], &hash);
} else {
auto hash_str = [&hash](const std::string& str) {
MurmurHash3::x86_128(str.data(), gsl::narrow_cast<int32_t>(str.size()), hash[0], &hash);
};

// fingerprint the main graph by hashing graph inputs and the ordered outputs from each node
for (const auto* node_arg : main_graph.GetInputsIncludingInitializers()) {
hash_str(node_arg->Name());
}

// note: process nodes in order defined in model to be deterministic
for (const auto& node : main_graph.Nodes()) {
for (const auto* node_arg : node.OutputDefs()) {
if (node_arg->Exists()) {
hash_str(node_arg->Name());
}
}
}
}

model_hash = hash[0] | (uint64_t(hash[1]) << 32);

main_graph_hash_[graph_instance_hash] = model_hash;
}

// return the current unique id, and increment to update
return model_metadef_id_[model_hash]++;
}

int IExecutionProvider::GenerateMetaDefId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) const {
ORT_ENFORCE(metadef_id_generator_,
"IExecutionProvider constructor must be called with true for use_metadef_id_creator");

// if the EP is shared across multiple sessions there's a very small potential for concurrency issues.
// use a lock when generating an id to be paranoid
static OrtMutex mutex;
std::lock_guard<OrtMutex> lock(mutex);
return metadef_id_generator_->GenerateId(graph_viewer, model_hash);
}

} // namespace onnxruntime
77 changes: 77 additions & 0 deletions onnxruntime/core/framework/model_metadef_id_generator.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,77 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
Fixed Show fixed Hide fixed
// Licensed under the MIT License.
#include <unordered_map>
#include "model_metadef_id_generator.h"

Check warning on line 4 in onnxruntime/core/framework/model_metadef_id_generator.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/framework/model_metadef_id_generator.cc#L4

Include the directory when naming header files [build/include_subdir] [4]
Raw output
onnxruntime/core/framework/model_metadef_id_generator.cc:4:  Include the directory when naming header files  [build/include_subdir] [4]
#include "core/platform/ort_mutex.h"
#include "core/graph/graph_viewer.h"
#include "core/framework/murmurhash3.h"

namespace onnxruntime {
int GenerateMetaDefId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) {
static std::unordered_map<HashValue, HashValue> main_graph_hash_; // map graph instance hash to model contents hash
static std::unordered_map<HashValue, int> model_metadef_id_; // current unique id for model

// if the EP is shared across multiple sessions there's a very small potential for concurrency issues.
// use a lock when generating an id to be paranoid
static OrtMutex mutex;
std::lock_guard<OrtMutex> lock(mutex);
model_hash = 0;

// find the top level graph
const Graph* cur_graph = &graph_viewer.GetGraph();
while (cur_graph->IsSubgraph()) {
cur_graph = cur_graph->ParentGraph();
}

uint32_t instance_hash[4] = {0, 0, 0, 0};

const Graph& main_graph = *cur_graph;

// hash the bytes in the Graph instance. we can't just use the address as a new Graph instance may use
// the same memory (unit tests prove this can occur). the raw bytes of the Graph instance should be a unique
// fingerprint for the instance that can use used as the key to the hash of the model path/contents.
MurmurHash3::x86_128(&main_graph, gsl::narrow_cast<int32_t>(sizeof(Graph)), instance_hash[0], &instance_hash);
HashValue graph_instance_hash = instance_hash[0] | (uint64_t(instance_hash[1]) << 32);

// if we've already hashed this main graph instance use the cached value
auto entry = main_graph_hash_.find(graph_instance_hash);
if (entry != main_graph_hash_.cend()) {
model_hash = entry->second;
} else {
uint32_t hash[4] = {0, 0, 0, 0};

// prefer path the model was loaded from
// this may not be available if the model was loaded from a stream or in-memory bytes
const auto& model_path_str = main_graph.ModelPath().ToPathString();
if (!model_path_str.empty()) {
MurmurHash3::x86_128(model_path_str.data(), gsl::narrow_cast<int32_t>(model_path_str.size()), hash[0], &hash);
} else {
auto hash_str = [&hash](const std::string& str) {

Check warning on line 49 in onnxruntime/core/framework/model_metadef_id_generator.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/framework/model_metadef_id_generator.cc#L49

Add #include <string> for string [build/include_what_you_use] [4]
Raw output
onnxruntime/core/framework/model_metadef_id_generator.cc:49:  Add #include <string> for string  [build/include_what_you_use] [4]
MurmurHash3::x86_128(str.data(), gsl::narrow_cast<int32_t>(str.size()), hash[0], &hash);
};

// fingerprint the main graph by hashing graph inputs and the ordered outputs from each node
for (const auto* node_arg : main_graph.GetInputsIncludingInitializers()) {
hash_str(node_arg->Name());
}

// note: process nodes in order defined in model to be deterministic
for (const auto& node : main_graph.Nodes()) {
for (const auto* node_arg : node.OutputDefs()) {
if (node_arg->Exists()) {
hash_str(node_arg->Name());
}
}
}
skottmckay marked this conversation as resolved.
Show resolved Hide resolved
}

model_hash = hash[0] | (uint64_t(hash[1]) << 32);

main_graph_hash_[graph_instance_hash] = model_hash;
}

// return the current unique id, and increment to update
return model_metadef_id_[model_hash]++;
}

} // namespace onnxruntime
24 changes: 24 additions & 0 deletions onnxruntime/core/framework/model_metadef_id_generator.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
// Copyright (c) Microsoft Corporation. All rights reserved.
Fixed Show fixed Hide fixed
// Licensed under the MIT License.

#pragma once
#include "core/common/basic_types.h"
namespace onnxruntime {
class GraphViewer;

// helper to generate ids that are unique to model and deterministic, even if the execution provider is shared across
// multiple sessions.
skottmckay marked this conversation as resolved.
Show resolved Hide resolved
/** Generate a unique id that can be used in a MetaDef name. Values are unique for a model instance.
The model hash is also returned if you wish to include that in the MetaDef name to ensure uniqueness across models.
@param graph_viewer[in] Graph viewer that GetCapability was called with. Can be for the main graph or nested graph.
@param model_hash[out] Returns the hash for the main (i.e. top level) graph in the model.
This is created using the model path if available,
or the model input names and the output names from all nodes in the main graph.
@remarks e.g. the TensorRT Execution Provider is used in multiple sessions and the underlying infrastructure caches
compiled kernels, so the name must be unique and deterministic across models and sessions.
NOTE: Ideally this would be a protected method, but to work across the EP bridge it has to be public and
virtual, and ModelMetadefIdGenerator but be defined in the header as well.
skottmckay marked this conversation as resolved.
Show resolved Hide resolved
*/
int GenerateMetaDefId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash);

} // namespace onnxruntime
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/cann/cann_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "core/providers/cann/cann_fwd.h"
#include "core/providers/cann/cann_stream_handle.h"
#include "core/providers/cann/npu_data_transfer.h"
#include "core/framework/model_metadef_id_generator.h"

using onnxruntime::cann::BuildONNXModel;
using onnxruntime::cann::CannModelPreparation;
Expand Down Expand Up @@ -1029,7 +1030,7 @@
} // namespace cann

CANNExecutionProvider::CANNExecutionProvider(const CANNExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kCannExecutionProvider, OrtDevice(OrtDevice::NPU, OrtDevice::MemType::DEFAULT, info.device_id), true}, info_{info} {
: IExecutionProvider{onnxruntime::kCannExecutionProvider, OrtDevice(OrtDevice::NPU, OrtDevice::MemType::DEFAULT, info.device_id)}, info_{info} {

Check warning on line 1033 in onnxruntime/core/providers/cann/cann_execution_provider.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/cann/cann_execution_provider.cc#L1033

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/core/providers/cann/cann_execution_provider.cc:1033:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
InitProviderOrtApi();

CANN_CALL_THROW(aclrtSetDevice(info_.device_id));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@
#include "core/providers/coreml/builders/helper.h"
#include "core/providers/partitioning_utils.h"
#include "core/session/onnxruntime_cxx_api.h"
#include "core/framework/model_metadef_id_generator.h"

#ifdef __APPLE__
#include "core/providers/coreml/builders/model_builder.h"
Expand All @@ -24,7 +25,7 @@ namespace onnxruntime {
constexpr const char* COREML = "CoreML";

CoreMLExecutionProvider::CoreMLExecutionProvider(uint32_t coreml_flags)
: IExecutionProvider{onnxruntime::kCoreMLExecutionProvider, true},
: IExecutionProvider{onnxruntime::kCoreMLExecutionProvider},
coreml_flags_(coreml_flags) {
}

Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/dnnl/dnnl_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
#include "core/providers/dnnl/dnnl_fwd.h"
#include "core/providers/dnnl/dnnl_node_capability.h"
#include "core/providers/dnnl/subgraph/dnnl_subgraph_transformer.h"
#include "core/framework/model_metadef_id_generator.h"

#define ORT_API_MANUAL_INIT
#include "core/session/onnxruntime_cxx_api.h"
Expand All @@ -30,7 +31,7 @@ constexpr const char* DNNL = "Dnnl";
constexpr const char* DNNL_CPU = "DnnlCpu";

DnnlExecutionProvider::DnnlExecutionProvider(const DnnlExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kDnnlExecutionProvider, true},
: IExecutionProvider{onnxruntime::kDnnlExecutionProvider},
info_(info) {
InitProviderOrtApi();

Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/js/js_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -680,7 +680,7 @@ std::unique_ptr<KernelRegistry> RegisterKernels() {
using namespace js;

JsExecutionProvider::JsExecutionProvider(const JsExecutionProviderInfo& info)
: IExecutionProvider{kJsExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0), true},
: IExecutionProvider{kJsExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, 0)},
preferred_data_layout_{info.data_layout} {
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@

// TODO: find a better way to share this
#include "core/providers/rocm/rocm_stream_handle.h"
#include "core/framework/model_metadef_id_generator.h"

#if defined(_MSC_VER)
#pragma warning(disable : 4244 4245)
Expand Down Expand Up @@ -102,7 +103,7 @@
}

MIGraphXExecutionProvider::MIGraphXExecutionProvider(const MIGraphXExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, info.device_id), true}, device_id_(info.device_id) {
: IExecutionProvider{onnxruntime::kMIGraphXExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, info.device_id)}, device_id_(info.device_id) {

Check warning on line 106 in onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc#L106

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/core/providers/migraphx/migraphx_execution_provider.cc:106:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
InitProviderOrtApi();
// Set GPU device to be used
HIP_CALL_THROW(hipSetDevice(device_id_));
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -19,6 +19,7 @@
#include "core/providers/partitioning_utils.h"
#include "core/providers/shared/node_unit/node_unit.h"
#include "core/session/onnxruntime_cxx_api.h"
#include "core/framework/model_metadef_id_generator.h"

namespace onnxruntime {

Expand Down Expand Up @@ -50,7 +51,7 @@ std::unordered_set<std::string> GetPartitioningStopOps(const optional<std::strin

NnapiExecutionProvider::NnapiExecutionProvider(uint32_t nnapi_flags,
const optional<std::string>& partitioning_stop_ops_list)
: IExecutionProvider{onnxruntime::kNnapiExecutionProvider, true},
: IExecutionProvider{onnxruntime::kNnapiExecutionProvider},
nnapi_flags_(nnapi_flags),
partitioning_stop_ops_(GetPartitioningStopOps(partitioning_stop_ops_list)) {
nnapi_handle_ = NnApiImplementation();
Expand Down
2 changes: 1 addition & 1 deletion onnxruntime/core/providers/partitioning_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,7 @@ using OnGroupClosedFn = std::function<bool(const std::vector<const Node*>& group

/**
Called to create a metadef name.
Most likely should call IExecutionProvider::GenerateMetaDefId.
Most likely should call GenerateMetaDefId.
See onnxruntime/test/providers/internal_testing/internal_testing_execution_provider.cc for example usage.

@return The metadef name.
Expand Down
3 changes: 2 additions & 1 deletion onnxruntime/core/providers/qnn/qnn_execution_provider.cc
Original file line number Diff line number Diff line change
Expand Up @@ -17,6 +17,7 @@
#include "core/providers/qnn/builder/op_builder_factory.h"
#include "core/providers/qnn/builder/qnn_def.h"
#include "core/providers/qnn/builder/onnx_ctx_model_helper.h"
#include "core/framework/model_metadef_id_generator.h"

namespace onnxruntime {

Expand Down Expand Up @@ -110,7 +111,7 @@ void QNNExecutionProvider::ParseHtpGraphFinalizationOptimizationMode(const std::

QNNExecutionProvider::QNNExecutionProvider(const ProviderOptions& provider_options_map,
const SessionOptions* session_options)
: IExecutionProvider{onnxruntime::kQnnExecutionProvider, true} {
: IExecutionProvider{onnxruntime::kQnnExecutionProvider} {
if (session_options) {
disable_cpu_ep_fallback_ = session_options->config_options.GetConfigOrDefault(
kOrtSessionOptionsDisableCPUEPFallback, "0") == "1";
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -329,10 +329,6 @@ common::Status IExecutionProvider::Compile(const std::vector<FusedNodeAndGraph>&
return g_host->IExecutionProvider__Compile(this, fused_nodes_and_graphs, node_compute_funcs);
}

int IExecutionProvider::GenerateMetaDefId(const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) const {
return g_host->IExecutionProvider__GenerateMetaDefId(this, graph_viewer, model_hash);
}

#ifdef USE_TENSORRT
std::unique_ptr<IAllocator> CreateCUDAAllocator(int16_t device_id, const char* name) {
return g_host->CreateCUDAAllocator(device_id, name);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -227,8 +227,6 @@ struct ProviderHost {

virtual common::Status IExecutionProvider__Compile(IExecutionProvider* p, const std::vector<IExecutionProvider::FusedNodeAndGraph>& fused_nodes_and_graphs, std::vector<NodeComputeInfo>& node_compute_funcs) = 0;

virtual int IExecutionProvider__GenerateMetaDefId(const IExecutionProvider* p, const onnxruntime::GraphViewer& graph_viewer, HashValue& model_hash) = 0;

// Status
virtual std::string Status__ToString(const Status* p) = 0;

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -1311,7 +1311,7 @@
}

TensorrtExecutionProvider::TensorrtExecutionProvider(const TensorrtExecutionProviderInfo& info)
: IExecutionProvider{onnxruntime::kTensorrtExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, info.device_id), true}, info_(info), device_id_(info.device_id) {
: IExecutionProvider{onnxruntime::kTensorrtExecutionProvider, OrtDevice(OrtDevice::GPU, OrtDevice::MemType::DEFAULT, info.device_id)}, info_(info), device_id_(info.device_id) {

Check warning on line 1314 in onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc

View workflow job for this annotation

GitHub Actions / cpplint

[cpplint] onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc#L1314

Lines should be <= 120 characters long [whitespace/line_length] [2]
Raw output
onnxruntime/core/providers/tensorrt/tensorrt_execution_provider.cc:1314:  Lines should be <= 120 characters long  [whitespace/line_length] [2]
InitProviderOrtApi();

CUDA_CALL_THROW(cudaSetDevice(device_id_));
Expand Down
Loading
Loading