-
Notifications
You must be signed in to change notification settings - Fork 30
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Enable Cuda in Graphics Implementation for TensorRT backend #100
base: main
Are you sure you want to change the base?
Changes from 7 commits
134bf33
7353671
ed1296d
05e3786
9ae5a09
89ab580
b624b98
1f1ae7e
b42a8d6
d9aff26
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -257,7 +257,13 @@ ModelInstanceState::ModelInstanceState( | |
|
||
ModelInstanceState::~ModelInstanceState() | ||
{ | ||
cudaSetDevice(DeviceId()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!model_state_->isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
cudaSetDevice(DeviceId()); | ||
} | ||
for (auto& io_binding_infos : io_binding_infos_) { | ||
for (auto& io_binding_info : io_binding_infos) { | ||
if (!io_binding_info.IsDynamicShapeOutput() && | ||
|
@@ -424,7 +430,13 @@ ModelInstanceState::Run( | |
payload_.reset(new Payload(next_set_, requests, request_count)); | ||
SET_TIMESTAMP(payload_->compute_start_ns_); | ||
|
||
cudaSetDevice(DeviceId()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!model_state_->isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
cudaSetDevice(DeviceId()); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Do you mind to share the reasoning of avoiding the set device calls? Wouldn't that cause the issue of model not being placed / executed on selected device (based on model config)? There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more.
|
||
} | ||
#ifdef TRITON_ENABLE_STATS | ||
{ | ||
SET_TIMESTAMP(payload_->compute_start_ns_); | ||
|
@@ -1551,13 +1563,20 @@ ModelInstanceState::EvaluateTensorRTContext( | |
TRITONSERVER_Error* | ||
ModelInstanceState::InitStreamsAndEvents() | ||
{ | ||
// Set the device before preparing the context. | ||
auto cuerr = cudaSetDevice(DeviceId()); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, (std::string("unable to set device for ") + | ||
Name() + ": " + cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!model_state_->isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
// Set the device before preparing the context. | ||
auto cuerr = cudaSetDevice(DeviceId()); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to set device for ") + Name() + ": " + | ||
cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
} | ||
} | ||
|
||
// Create CUDA streams associated with the instance | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -175,7 +175,13 @@ ModelState::ModelState(TRITONBACKEND_Model* triton_model) | |
ModelState::~ModelState() | ||
{ | ||
for (auto& device_engine : device_engines_) { | ||
cudaSetDevice(device_engine.first.first); | ||
#ifdef TRITON_ENABLE_CIG | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I know I had asked for looking at macros to enable, but I would like to avoid this kind of guard - if we can use a single method and then have two different implementations of that method / object would prefer that to having the macros embedded in the functions / methods. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Fixed |
||
// Set device if CiG is disabled | ||
if (!isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
cudaSetDevice(device_engine.first.first); | ||
} | ||
auto& runtime = device_engine.second.first; | ||
auto& engine = device_engine.second.second; | ||
// Need to reset explicitly to ensure proper destruction order | ||
|
@@ -209,15 +215,20 @@ ModelState::CreateEngine( | |
// We share the engine (for models that don't have dynamic shapes) and | ||
// runtime across instances that have access to the same GPU/NVDLA. | ||
if (eit->second.second == nullptr) { | ||
auto cuerr = cudaSetDevice(gpu_device); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to set device for ") + Name() + ": " + | ||
cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
auto cuerr = cudaSetDevice(gpu_device); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to set device for ") + Name() + ": " + | ||
cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
} | ||
} | ||
|
||
const bool new_runtime = (eit->second.first == nullptr); | ||
RETURN_IF_ERROR(LoadPlan( | ||
model_path, dla_core_id, &eit->second.first, &eit->second.second, | ||
|
@@ -321,13 +332,19 @@ ModelState::AutoCompleteConfig() | |
" to auto-complete config for " + Name()) | ||
.c_str())); | ||
|
||
cuerr = cudaSetDevice(device_id); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to set CUDA device to GPU ") + | ||
std::to_string(device_id) + " : " + cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!isCiGEnabled()) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
cuerr = cudaSetDevice(device_id); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to set CUDA device to GPU ") + | ||
std::to_string(device_id) + " : " + cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
} | ||
} | ||
|
||
std::string artifact_name; | ||
|
@@ -373,13 +390,19 @@ ModelState::AutoCompleteConfig() | |
|
||
RETURN_IF_ERROR(AutoCompleteConfigHelper(model_path)); | ||
|
||
cuerr = cudaSetDevice(current_device); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to revert CUDA device to GPU ") + | ||
std::to_string(current_device) + " : " + cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
#ifdef TRITON_ENABLE_CIG | ||
// Set device if CiG is disabled | ||
if (!isCiGEnabled()) | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @tanmayv25, @ashishk98 - is there a way to have a single scoped object
That internally checks if there is an We don't currently use them in the same locations - but am wondering if that would be possible - I think it would be cleaner logically - where basically an 'application_context' takes the place of the 'device' but otherwise the logic remains the same.
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. We can take a look at this in the next iteration |
||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
cuerr = cudaSetDevice(current_device); | ||
if (cuerr != cudaSuccess) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to revert CUDA device to GPU ") + | ||
std::to_string(current_device) + " : " + cudaGetErrorString(cuerr)) | ||
.c_str()); | ||
} | ||
} | ||
|
||
if (TRITONSERVER_LogIsEnabled(TRITONSERVER_LOG_VERBOSE)) { | ||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -26,6 +26,8 @@ | |
|
||
#include "tensorrt_model.h" | ||
|
||
#include <sstream> | ||
|
||
namespace triton { namespace backend { namespace tensorrt { | ||
|
||
TensorRTModel::Priority | ||
|
@@ -54,6 +56,10 @@ TensorRTModel::TensorRTModel(TRITONBACKEND_Model* triton_model) | |
use_cuda_graphs_(false), gather_kernel_buffer_threshold_(0), | ||
separate_output_stream_(false), eager_batching_(false), | ||
busy_wait_events_(false) | ||
#ifdef TRITON_ENABLE_CIG | ||
, | ||
cig_ctx_(nullptr) | ||
#endif // TRITON_ENABLE_CIG | ||
{ | ||
ParseModelConfig(); | ||
} | ||
|
@@ -90,6 +96,23 @@ TensorRTModel::ParseModelConfig() | |
} | ||
} | ||
|
||
#ifdef TRITON_ENABLE_CIG | ||
triton::common::TritonJson::Value parameters; | ||
if (model_config_.Find("parameters", ¶meters)) { | ||
triton::common::TritonJson::Value value; | ||
std::string ptr_value; | ||
if (parameters.Find("CIG_CONTEXT_PTR", &value)) { | ||
RETURN_IF_ERROR(value.MemberAsString("string_value", &ptr_value)); | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ashishk98 instead of directly converting here as a special case, I would prefer to use something similar to what is done in the trt-llm backend: In this case there is a template method to convert from a parameter to a value - I think the code will be a little clearer to follow. Also - can we convert to and from a 64 bit integer? so something like:
Also it strikes me that although we use we could also use So two things - 1) add a templated GetParameter() method and 2) we can use MemberAsUint for the uint64 template. 3) officially transfer uint64 values and convert them to and from context. There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. I have added a GetParameter call for std::string instead of UINT64. This is because when we add the parameter to model config it is directly converted into a hex string instead of a numeric string. Hence while parsing the pointer, MemberAsUint fails because it gets a hex string to parse. |
||
std::stringstream ss; | ||
ss << ptr_value; | ||
void* ctx_ptr; | ||
ss >> ctx_ptr; | ||
cig_ctx_ = static_cast<CUcontext>(ctx_ptr); | ||
LOG_MESSAGE(TRITONSERVER_LOG_VERBOSE, "CiG Context pointer is set"); | ||
} | ||
} | ||
#endif // TRITON_ENABLE_CIG | ||
|
||
return nullptr; // Success | ||
} | ||
|
||
|
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -25,6 +25,10 @@ | |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
#pragma once | ||
|
||
#ifdef TRITON_ENABLE_CIG | ||
#include <cuda.h> | ||
#endif // TRITON_ENABLE_CIG | ||
|
||
#include "triton/backend/backend_model.h" | ||
|
||
namespace triton { namespace backend { namespace tensorrt { | ||
|
@@ -53,6 +57,41 @@ class TensorRTModel : public BackendModel { | |
bool EagerBatching() { return eager_batching_; } | ||
bool BusyWaitEvents() { return busy_wait_events_; } | ||
|
||
#ifdef TRITON_ENABLE_CIG | ||
//! Following functions are related to CiG (Cuda in Graphics) context sharing | ||
//! for gaming use case. Creating a shared contexts reduces context switching | ||
//! overhead and leads to better performance of model execution along side | ||
//! Graphics workload. | ||
CUcontext GetCiGContext() { return cig_ctx_; } | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. @ashishk98 question: is this specific to CIG - or could be applied to any application provided cuda context? |
||
bool isCiGEnabled() { return cig_ctx_ != nullptr; } | ||
|
||
inline TRITONSERVER_Error* PushCiGContext() | ||
{ | ||
if (CUDA_SUCCESS != cuCtxPushCurrent(cig_ctx_)) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to push CiG context for ") + Name()).c_str()); | ||
} | ||
return nullptr; | ||
} | ||
|
||
inline TRITONSERVER_Error* PopCiGContext() | ||
{ | ||
CUcontext oldCtx{}; | ||
if (CUDA_SUCCESS != cuCtxPopCurrent(&oldCtx)) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("unable to [pop CiG context for ") + Name()).c_str()); | ||
} | ||
if (oldCtx != cig_ctx_) { | ||
return TRITONSERVER_ErrorNew( | ||
TRITONSERVER_ERROR_INTERNAL, | ||
(std::string("popping the wrong CiG context for ") + Name()).c_str()); | ||
} | ||
return nullptr; | ||
} | ||
#endif // TRITON_ENABLE_CIG | ||
|
||
protected: | ||
common::TritonJson::Value graph_specs_; | ||
Priority priority_; | ||
|
@@ -61,6 +100,28 @@ class TensorRTModel : public BackendModel { | |
bool separate_output_stream_; | ||
bool eager_batching_; | ||
bool busy_wait_events_; | ||
#ifdef TRITON_ENABLE_CIG | ||
CUcontext cig_ctx_; | ||
#endif // TRITON_ENABLE_CIG | ||
}; | ||
|
||
#ifdef TRITON_ENABLE_CIG | ||
struct ScopedRuntimeCiGContext { | ||
ScopedRuntimeCiGContext(TensorRTModel* model_state) | ||
: model_state_(model_state) | ||
{ | ||
if (model_state_->isCiGEnabled()) { | ||
THROW_IF_BACKEND_MODEL_ERROR(model_state_->PushCiGContext()); | ||
} | ||
} | ||
~ScopedRuntimeCiGContext() | ||
{ | ||
if (model_state_->isCiGEnabled()) { | ||
THROW_IF_BACKEND_MODEL_ERROR(model_state_->PopCiGContext()); | ||
} | ||
} | ||
TensorRTModel* model_state_; | ||
}; | ||
#endif // TRITON_ENABLE_CIG | ||
|
||
}}} // namespace triton::backend::tensorrt |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
These setting could be achieved with generator expression, isn't?
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
What is a generator expression?