Skip to content

Commit

Permalink
Add INT64 Datatype Support for Shape Tensors in TensorRT Backend (#91)
Browse files Browse the repository at this point in the history
* Add INT64 datatype support for shape tensors
  • Loading branch information
pskiran1 authored Jun 20, 2024
1 parent 1aec8bb commit 315293f
Show file tree
Hide file tree
Showing 7 changed files with 392 additions and 92 deletions.
2 changes: 2 additions & 0 deletions CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -141,6 +141,8 @@ add_library(
${SOURCE_DIR}/instance_state.cc
${SOURCE_DIR}/tensorrt_model_instance.cc
${SOURCE_DIR}/tensorrt_model_instance.h
${SOURCE_DIR}/shape_tensor.cc
${SOURCE_DIR}/shape_tensor.h
${SOURCE_DIR}/tensorrt_utils.cc
${SOURCE_DIR}/tensorrt_utils.h
${SOURCE_DIR}/filesystem.h
Expand Down
109 changes: 44 additions & 65 deletions src/instance_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ ModelInstanceState::Run(
return;
}

std::map<int32_t, std::vector<int32_t>> request_shape_values;
std::map<int32_t, ShapeTensor> request_shape_values;
// Scheduler ensures all the requests have identical shape values so
// use values from first shape tensor
TRITONSERVER_Error* err = GetRequestShapeValues(
Expand Down Expand Up @@ -587,8 +587,7 @@ ModelInstanceState::Run(
if (it != request_shape_values.end()) {
err = ValidateShapeValues(
it->second, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_,
support_batching_);
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_);
} else {
err = TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
Expand All @@ -607,8 +606,8 @@ ModelInstanceState::Run(
// [FIXME] formalize it, the 'buffer_' may be set directly while forming
// the shape value
memcpy(
io_binding_info.GetBuffer(), &(it->second[0]),
sizeof(int32_t) * it->second.size());
io_binding_info.GetBuffer(), it->second.GetData(),
it->second.GetSize());
citr->second.context_->setInputTensorAddress(
name.c_str(), io_binding_info.GetBuffer());
}
Expand Down Expand Up @@ -1304,7 +1303,7 @@ ModelInstanceState::ProcessResponse()
TRITONSERVER_Error*
ModelInstanceState::GetRequestShapeValues(
size_t total_batch_size, TRITONBACKEND_Request* request,
std::map<int, std::vector<int32_t>>* request_shape_values)
std::map<int, ShapeTensor>* request_shape_values)
{
// Visit all the inputs and extract the shape values present in the
// request
Expand All @@ -1325,12 +1324,6 @@ ModelInstanceState::GetRequestShapeValues(

int io_index = io_index_map_[input_name];
if (engine_->isShapeInferenceIO(input_name)) {
auto it =
request_shape_values->emplace(io_index, std::vector<int32_t>()).first;
if (support_batching_) {
it->second.push_back((int32_t)total_batch_size);
}

// For now being conservative and requiring that shape tensors
// be in a single buffer on the CPU. We can handle more cases in
// future if necessary.
Expand Down Expand Up @@ -1359,38 +1352,15 @@ ModelInstanceState::GetRequestShapeValues(
.c_str());
}

// FIXME DLIS-6653: With the support of INT64, shape tensors
// can also be of type INT64 and the assumptions that shape
// tensors might not always hold true.
// Assuming input shape tensors datatype as INT32.
int64_t element_cnt = backend::GetElementCount(shape, dims_count);
if (support_batching_) {
element_cnt /= shape[0];
}
const size_t expected_byte_size =
element_cnt * GetByteSize(TRITONSERVER_TYPE_INT32, {1});

bool includes_batch_shape_value = false;
if (expected_byte_size != data_byte_size) {
if (expected_byte_size == (data_byte_size - sizeof(int32_t))) {
includes_batch_shape_value = true;
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("shape tensor for input '") + input_name +
"' expected byte size is " + std::to_string(expected_byte_size) +
" [ or " + std::to_string(expected_byte_size + sizeof(int32_t)) +
" if input includes batch shape value] " + ", got " +
std::to_string(data_byte_size))
.c_str());
}
}

const int32_t* dims = reinterpret_cast<const int32_t*>(data_buffer);
int64_t offset = includes_batch_shape_value ? 1 : 0;
for (int64_t i = offset; i < element_cnt; ++i) {
it->second.push_back(dims[i]);
}
auto it = request_shape_values->emplace(io_index, ShapeTensor()).first;
RETURN_IF_ERROR(it->second.SetDataFromBuffer(
data_buffer, data_byte_size, datatype, element_cnt, input_name,
support_batching_, total_batch_size));
}
}

Expand All @@ -1401,7 +1371,7 @@ TRITONSERVER_Error*
ModelInstanceState::GetMostOptimizedProfile(
size_t total_batch_size, TRITONBACKEND_Request** requests,
uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
std::map<int, TensorRTContext>::iterator* citr)
{
// Returns the TensorRT context that uses profile with shortest
Expand Down Expand Up @@ -1452,7 +1422,7 @@ TRITONSERVER_Error*
ModelInstanceState::EvaluateTensorRTContext(
std::map<int, TensorRTContext>::iterator& citr, size_t total_batch_size,
TRITONBACKEND_Request** requests, uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
int64_t* error_distance)
{
*error_distance = 0;
Expand Down Expand Up @@ -1519,13 +1489,12 @@ ModelInstanceState::EvaluateTensorRTContext(
if (it != request_shape_values.end()) {
shape_err = ValidateShapeValues(
it->second, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_,
support_batching_);
valid_bs =
(!support_batching_) || (((int32_t)total_batch_size >=
*citr->second.min_shapes_[io_index]) &&
((int64_t)total_batch_size <=
*citr->second.max_shapes_[io_index]));
citr->second.max_shapes_[io_index],
citr->second.nb_shape_values_);
valid_bs = (!support_batching_) ||
ValidateBatchSize(
total_batch_size, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index]);
} else {
missing_shape_values = true;
}
Expand All @@ -1549,14 +1518,9 @@ ModelInstanceState::EvaluateTensorRTContext(
std::abs(opt_dims.d[idx] - input_shape_vec[idx - 1]);
}
if (engine_->isShapeInferenceIO(input_name)) {
const auto* opt_shape_values = citr->second.opt_shapes_[io_index];
*error_distance +=
std::abs(*opt_shape_values - (int64_t)total_batch_size);
auto it = request_shape_values.find(io_index);
for (size_t idx = 1; idx < citr->second.nb_shape_values_; idx++) {
*error_distance +=
std::abs(*(opt_shape_values + idx) - it->second[idx - 1]);
}
*error_distance += it->second.GetDistance(
citr->second.opt_shapes_[io_index], total_batch_size);
}
}
}
Expand Down Expand Up @@ -2996,13 +2960,14 @@ ModelInstanceState::InitializeShapeInputBinding(
return nullptr;
}

if (input_datatype != TRITONSERVER_TYPE_INT32) {
if ((input_datatype != TRITONSERVER_TYPE_INT32) &&
(input_datatype != TRITONSERVER_TYPE_INT64)) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("unexpected datatype TYPE_") +
TRITONSERVER_DataTypeString(input_datatype) +
" in model configuration for shape input '" + input_name +
"', expecting TYPE_INT32 for " + Name())
" in model configuration for shape input '" + input_name +
"', expecting TYPE_INT32 or TYPE_INT64 for " + Name())
.c_str());
}

Expand Down Expand Up @@ -3042,18 +3007,32 @@ ModelInstanceState::InitializeShapeInputBinding(
context.nb_shape_values_ = (context.max_dims_[io_index].nbDims == 0)
? 1
: context.max_dims_[io_index].d[0];
context.max_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kMAX);
context.min_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kMIN);
context.opt_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kOPT);
context.max_shapes_[io_index] = ShapeTensor();
context.max_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kMAX),
input_datatype, context.nb_shape_values_);

context.min_shapes_[io_index] = ShapeTensor();
context.min_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kMIN),
input_datatype, context.nb_shape_values_);

context.opt_shapes_[io_index] = ShapeTensor();
context.opt_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kOPT),
input_datatype, context.nb_shape_values_);

// Set shape tensor address to buffer that contains max allowed value so
// later shape inference will return max output shape / size for
// pre-allocation.
if (!context.context_->setInputTensorAddress(
input_name.c_str(), context.max_shapes_[io_index])) {
input_name.c_str(), context.max_shapes_[io_index].GetData())) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(std::string("trt failed to set the input shape binding for '") +
Expand Down
13 changes: 7 additions & 6 deletions src/instance_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -35,6 +35,7 @@
#include "io_binding_info.h"
#include "model_state.h"
#include "semaphore.h"
#include "shape_tensor.h"
#include "tensorrt_model_instance.h"
#include "triton/backend/backend_input_collector.h"
#include "triton/backend/backend_output_responder.h"
Expand Down Expand Up @@ -136,13 +137,13 @@ struct TensorRTContext {
std::vector<nvinfer1::Dims> opt_dims_{};

// Min shape values per bindings
std::vector<const int32_t*> min_shapes_{};
std::vector<ShapeTensor> min_shapes_{};

// Max shape values per bindings
std::vector<const int32_t*> max_shapes_{};
std::vector<ShapeTensor> max_shapes_{};

// Optimized shape values per bindings
std::vector<const int32_t*> opt_shapes_{};
std::vector<ShapeTensor> opt_shapes_{};

// The number of shape values
size_t nb_shape_values_{0};
Expand Down Expand Up @@ -333,16 +334,16 @@ class ModelInstanceState : public TensorRTModelInstance {

TRITONSERVER_Error* GetRequestShapeValues(
size_t total_batch_size, TRITONBACKEND_Request* request,
std::map<int, std::vector<int32_t>>* request_shape_values);
std::map<int, ShapeTensor>* request_shape_values);
TRITONSERVER_Error* GetMostOptimizedProfile(
size_t total_batch_size, TRITONBACKEND_Request** requests,
uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
std::map<int, TensorRTContext>::iterator* citr);
TRITONSERVER_Error* EvaluateTensorRTContext(
std::map<int, TensorRTContext>::iterator& citr, size_t total_batch_size,
TRITONBACKEND_Request** requests, uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
int64_t* error_distance);

bool SetOutputShapeTensorBuffer(
Expand Down
Loading

0 comments on commit 315293f

Please sign in to comment.