Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add INT64 Datatype Support for Shape Tensors in TensorRT Backend #91

Merged
merged 10 commits into from
Jun 20, 2024
227 changes: 167 additions & 60 deletions src/instance_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -500,7 +500,7 @@ ModelInstanceState::Run(
return;
}

std::map<int32_t, std::vector<int32_t>> request_shape_values;
std::map<int32_t, ShapeTensor> request_shape_values;
// Scheduler ensures all the requests have identical shape values so
// use values from first shape tensor
TRITONSERVER_Error* err = GetRequestShapeValues(
Expand Down Expand Up @@ -587,8 +587,7 @@ ModelInstanceState::Run(
if (it != request_shape_values.end()) {
err = ValidateShapeValues(
it->second, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_,
support_batching_);
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_);
} else {
err = TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
Expand All @@ -607,8 +606,8 @@ ModelInstanceState::Run(
// [FIXME] formalize it, the 'buffer_' may be set directly while forming
// the shape value
memcpy(
io_binding_info.GetBuffer(), &(it->second[0]),
sizeof(int32_t) * it->second.size());
io_binding_info.GetBuffer(), it->second.GetData(),
it->second.GetSize());
citr->second.context_->setInputTensorAddress(
name.c_str(), io_binding_info.GetBuffer());
}
Expand Down Expand Up @@ -1304,7 +1303,7 @@ ModelInstanceState::ProcessResponse()
TRITONSERVER_Error*
ModelInstanceState::GetRequestShapeValues(
size_t total_batch_size, TRITONBACKEND_Request* request,
std::map<int, std::vector<int32_t>>* request_shape_values)
std::map<int, ShapeTensor>* request_shape_values)
{
// Visit all the inputs and extract the shape values present in the
// request
Expand All @@ -1325,12 +1324,6 @@ ModelInstanceState::GetRequestShapeValues(

int io_index = io_index_map_[input_name];
if (engine_->isShapeInferenceIO(input_name)) {
auto it =
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
request_shape_values->emplace(io_index, std::vector<int32_t>()).first;
if (support_batching_) {
it->second.push_back((int32_t)total_batch_size);
}

// For now being conservative and requiring that shape tensors
// be in a single buffer on the CPU. We can handle more cases in
// future if necessary.
Expand Down Expand Up @@ -1359,38 +1352,39 @@ ModelInstanceState::GetRequestShapeValues(
.c_str());
}

// FIXME DLIS-6653: With the support of INT64, shape tensors
// can also be of type INT64 and the assumptions that shape
// tensors might not always hold true.
// Assuming input shape tensors datatype as INT32.
int64_t element_cnt = backend::GetElementCount(shape, dims_count);
if (support_batching_) {
element_cnt /= shape[0];
}
const size_t expected_byte_size =
element_cnt * GetByteSize(TRITONSERVER_TYPE_INT32, {1});

bool includes_batch_shape_value = false;
if (expected_byte_size != data_byte_size) {
if (expected_byte_size == (data_byte_size - sizeof(int32_t))) {
includes_batch_shape_value = true;
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("shape tensor for input '") + input_name +
"' expected byte size is " + std::to_string(expected_byte_size) +
" [ or " + std::to_string(expected_byte_size + sizeof(int32_t)) +
" if input includes batch shape value] " + ", got " +
std::to_string(data_byte_size))
.c_str());
}
size_t datatype_size;
if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT32) {
pskiran1 marked this conversation as resolved.
Show resolved Hide resolved
datatype_size = TRITONSERVER_DataTypeByteSize(TRITONSERVER_TYPE_INT32);
} else if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT64) {
datatype_size = TRITONSERVER_DataTypeByteSize(TRITONSERVER_TYPE_INT64);
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
"Un supported shape tensor data type");
pskiran1 marked this conversation as resolved.
Show resolved Hide resolved
}
const size_t expected_byte_size = element_cnt * datatype_size;

const int32_t* dims = reinterpret_cast<const int32_t*>(data_buffer);
int64_t offset = includes_batch_shape_value ? 1 : 0;
for (int64_t i = offset; i < element_cnt; ++i) {
it->second.push_back(dims[i]);
if ((expected_byte_size != data_byte_size) &&
tanmayv25 marked this conversation as resolved.
Show resolved Hide resolved
(expected_byte_size != (data_byte_size - datatype_size))) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("shape tensor for input '") + input_name +
"' expected byte size is " + std::to_string(expected_byte_size) +
" [ or " + std::to_string(expected_byte_size + datatype_size) +
" if input includes batch shape value] " + ", got " +
std::to_string(data_byte_size))
.c_str());
}

auto it = request_shape_values->emplace(io_index, ShapeTensor()).first;
RETURN_IF_ERROR(it->second.SetDataFromBuffer(
data_buffer, datatype, element_cnt, support_batching_,
total_batch_size));
}
}

Expand All @@ -1401,7 +1395,7 @@ TRITONSERVER_Error*
ModelInstanceState::GetMostOptimizedProfile(
size_t total_batch_size, TRITONBACKEND_Request** requests,
uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
std::map<int, TensorRTContext>::iterator* citr)
{
// Returns the TensorRT context that uses profile with shortest
Expand Down Expand Up @@ -1452,7 +1446,7 @@ TRITONSERVER_Error*
ModelInstanceState::EvaluateTensorRTContext(
std::map<int, TensorRTContext>::iterator& citr, size_t total_batch_size,
TRITONBACKEND_Request** requests, uint32_t request_count,
const std::map<int, std::vector<int32_t>>& request_shape_values,
const std::map<int, ShapeTensor>& request_shape_values,
int64_t* error_distance)
{
*error_distance = 0;
Expand Down Expand Up @@ -1519,13 +1513,12 @@ ModelInstanceState::EvaluateTensorRTContext(
if (it != request_shape_values.end()) {
shape_err = ValidateShapeValues(
it->second, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index], citr->second.nb_shape_values_,
support_batching_);
valid_bs =
(!support_batching_) || (((int32_t)total_batch_size >=
*citr->second.min_shapes_[io_index]) &&
((int64_t)total_batch_size <=
*citr->second.max_shapes_[io_index]));
citr->second.max_shapes_[io_index],
citr->second.nb_shape_values_);
valid_bs = (!support_batching_) ||
ValidateBatchSize(
total_batch_size, citr->second.min_shapes_[io_index],
citr->second.max_shapes_[io_index]);
} else {
missing_shape_values = true;
}
Expand All @@ -1549,13 +1542,29 @@ ModelInstanceState::EvaluateTensorRTContext(
std::abs(opt_dims.d[idx] - input_shape_vec[idx - 1]);
}
if (engine_->isShapeInferenceIO(input_name)) {
const auto* opt_shape_values = citr->second.opt_shapes_[io_index];
*error_distance +=
std::abs(*opt_shape_values - (int64_t)total_batch_size);
auto it = request_shape_values.find(io_index);
for (size_t idx = 1; idx < citr->second.nb_shape_values_; idx++) {
if (it->second.GetDataType() == ShapeTensorDataType::INT32) {
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think the duplicate code can be reduced here. How about adding a function in the ShapeTensor class called GetDistance which returns the distance between this shape tensor and the one that gets passed in the argument.
It can take care of data type handling.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Moved this code to ShapeTensor::GetDistance() method.

const auto* shape_values =
reinterpret_cast<const int32_t*>(it->second.GetData());
const auto* opt_shape_values = reinterpret_cast<const int32_t*>(
citr->second.opt_shapes_[io_index].GetData());
*error_distance +=
std::abs(*(opt_shape_values + idx) - it->second[idx - 1]);
std::abs(*opt_shape_values - (int64_t)total_batch_size);
for (size_t idx = 1; idx < citr->second.nb_shape_values_; idx++) {
*error_distance +=
std::abs(*(opt_shape_values + idx) - shape_values[idx - 1]);
}
} else {
const auto* shape_values =
reinterpret_cast<const int64_t*>(it->second.GetData());
const auto* opt_shape_values = reinterpret_cast<const int64_t*>(
citr->second.opt_shapes_[io_index].GetData());
*error_distance +=
std::abs(*opt_shape_values - (int64_t)total_batch_size);
for (size_t idx = 1; idx < citr->second.nb_shape_values_; idx++) {
*error_distance +=
std::abs(*(opt_shape_values + idx) - shape_values[idx - 1]);
}
}
}
}
Expand Down Expand Up @@ -2996,13 +3005,14 @@ ModelInstanceState::InitializeShapeInputBinding(
return nullptr;
}

if (input_datatype != TRITONSERVER_TYPE_INT32) {
if ((input_datatype != TRITONSERVER_TYPE_INT32) &&
(input_datatype != TRITONSERVER_TYPE_INT64)) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("unexpected datatype TYPE_") +
TRITONSERVER_DataTypeString(input_datatype) +
" in model configuration for shape input '" + input_name +
"', expecting TYPE_INT32 for " + Name())
" in model configuration for shape input '" + input_name +
"', expecting TYPE_INT32 or TYPE_INT64 for " + Name())
.c_str());
}

Expand Down Expand Up @@ -3042,18 +3052,32 @@ ModelInstanceState::InitializeShapeInputBinding(
context.nb_shape_values_ = (context.max_dims_[io_index].nbDims == 0)
? 1
: context.max_dims_[io_index].d[0];
context.max_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kMAX);
context.min_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kMIN);
context.opt_shapes_[io_index] = engine_->getProfileTensorValues(
input_name.c_str(), profile_index, nvinfer1::OptProfileSelector::kOPT);
context.max_shapes_[io_index] = ShapeTensor();
context.max_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kMAX),
input_datatype, context.nb_shape_values_);

context.min_shapes_[io_index] = ShapeTensor();
context.min_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kMIN),
input_datatype, context.nb_shape_values_);

context.opt_shapes_[io_index] = ShapeTensor();
context.opt_shapes_[io_index].SetDataFromShapeValues(
engine_->getProfileTensorValues(
input_name.c_str(), profile_index,
nvinfer1::OptProfileSelector::kOPT),
input_datatype, context.nb_shape_values_);

// Set shape tensor address to buffer that contains max allowed value so
// later shape inference will return max output shape / size for
// pre-allocation.
if (!context.context_->setInputTensorAddress(
input_name.c_str(), context.max_shapes_[io_index])) {
input_name.c_str(), context.max_shapes_[io_index].GetData())) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(std::string("trt failed to set the input shape binding for '") +
Expand Down Expand Up @@ -3835,4 +3859,87 @@ TRTv3Interface::SetBindingDimensions(

return nullptr;
}

TRITONSERVER_Error*
ShapeTensor::SetDataFromBuffer(
const char* data, const TRITONSERVER_DataType datatype,
const size_t element_cnt, const bool support_batching,
const size_t total_batch_size)
{
if (data == nullptr) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
"Null data pointer received for Shape tensor");
}

element_cnt_ = element_cnt;
size_t datatype_size;

if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT32) {
datatype_size = sizeof(int32_t);
datatype_ = ShapeTensorDataType::INT32;
} else if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT64) {
datatype_size = sizeof(int64_t);
datatype_ = ShapeTensorDataType::INT64;
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
"Unsupported data type received for Shape tensor");
}

if (support_batching) {
element_cnt_++; // Account for batch size
size_ = element_cnt_ * datatype_size;
data_.reset(new char[size_]);

if (datatype_ == ShapeTensorDataType::INT32) {
*reinterpret_cast<int32_t*>(data_.get()) =
static_cast<int32_t>(total_batch_size);
} else if (datatype_ == ShapeTensorDataType::INT64) {
*reinterpret_cast<int64_t*>(data_.get()) =
static_cast<int64_t>(total_batch_size);
}
std::memcpy(data_.get() + datatype_size, data, (size_ - datatype_size));
} else {
size_ = element_cnt_ * datatype_size;
data_.reset(new char[size_]);
std::memcpy(data_.get(), data, size_);
}

return nullptr;
}

TRITONSERVER_Error*
ShapeTensor::SetDataFromShapeValues(
const int32_t* shape_values, const TRITONSERVER_DataType datatype,
const size_t element_cnt)
{
element_cnt_ = element_cnt;
size_t datatype_size;

if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT32) {
datatype_size = sizeof(int32_t);
datatype_ = ShapeTensorDataType::INT32;
size_ = element_cnt_ * datatype_size;
data_.reset(new char[size_]);
int32_t* data_ptr = reinterpret_cast<int32_t*>(data_.get());
std::memcpy(data_ptr, shape_values, size_);
} else if (datatype == TRITONSERVER_DataType::TRITONSERVER_TYPE_INT64) {
datatype_size = sizeof(int64_t);
datatype_ = ShapeTensorDataType::INT64;
size_ = element_cnt_ * datatype_size;
data_.reset(new char[size_]);
int64_t* data_ptr = reinterpret_cast<int64_t*>(data_.get());
for (size_t i = 0; i < element_cnt_; ++i) {
data_ptr[i] = static_cast<int64_t>(shape_values[i]);
}
} else {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
"Unsupported data type received for Shape tensor");
}

return nullptr;
}

}}} // namespace triton::backend::tensorrt
Loading
Loading