Skip to content

Commit

Permalink
Fix shape and reformat free tensor handling in the input byte size ch…
Browse files Browse the repository at this point in the history
…eck (#97)

* Update flag name
* Improve error message
  • Loading branch information
pskiran1 authored Jul 27, 2024
1 parent 40a2b30 commit 8fe8277
Show file tree
Hide file tree
Showing 6 changed files with 146 additions and 19 deletions.
73 changes: 67 additions & 6 deletions src/instance_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -723,11 +723,12 @@ ModelInstanceState::Run(
TRITONSERVER_DataType datatype;
const int64_t* shape;
uint32_t dims_count;
size_t req_data_byte_size;
FAIL_ALL_AND_RETURN_IF_ERROR(
payload_->requests_, payload_->request_count_, payload_->responses_,
TRITONBACKEND_InputProperties(
repr_input, nullptr, &datatype, &shape, &dims_count, nullptr,
nullptr),
repr_input, nullptr, &datatype, &shape, &dims_count,
&req_data_byte_size, nullptr),
(std::string("failed to obtain the representative input "
"properties for '") +
name + "'")
Expand Down Expand Up @@ -760,12 +761,30 @@ ModelInstanceState::Run(
size_t total_byte_size = 0;
if (io_binding_info.GetFormat().is_linear_format_) {
total_byte_size = GetByteSize(datatype, batchn_shape);
// For input tensors with a linear IO format, the request has already
// verified the byte size, so no further validation is needed here.
} else {
batchn_shape[io_binding_info.GetFormat().vectorized_dim_] +=
(io_binding_info.GetFormat().components_per_element_ -
(batchn_shape[io_binding_info.GetFormat().vectorized_dim_] %
io_binding_info.GetFormat().components_per_element_));
total_byte_size = GetByteSize(datatype, batchn_shape);

// Ensure the request data byte size matches the expected byte size for
// non-linear IO format tensors
if (req_data_byte_size != total_byte_size) {
FAIL_ALL_AND_RETURN_IF_ERROR(
payload_->requests_, payload_->request_count_,
payload_->responses_,
TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("input byte size mismatch for input '") + name +
"'" + " for model '" + model_state_->Name() +
"'. Expected " + std::to_string(total_byte_size) + ", got " +
std::to_string(req_data_byte_size))
.c_str()),
"failed to run TRT inference");
}
}

payload_->collector_->ProcessTensor(
Expand Down Expand Up @@ -1760,7 +1779,8 @@ ModelInstanceState::ValidateIO()
{
// Collect all the expected input and allowed output tensor names
// and validate that the model configuration specifies only those.
std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors;
std::set<std::string> allowed_inputs, allowed_outputs, allowed_shape_tensors,
allowed_non_linear_format_io;
for (int i = 0; i < total_io_tensors_; ++i) {
const std::string& tensor_name = tensor_names_[i];
if (IsInput(engine_.get(), tensor_name)) {
Expand All @@ -1775,6 +1795,15 @@ ModelInstanceState::ValidateIO()
" as shape binding for " + Name())
.c_str());
}
auto detected_io_format = engine_->getTensorFormat(tensor_name.c_str());
if (detected_io_format != nvinfer1::TensorFormat::kLINEAR) {
allowed_non_linear_format_io.emplace(tensor_name);
LOG_MESSAGE(
TRITONSERVER_LOG_VERBOSE,
(std::string("Detected ") + tensor_name + " using IO format " +
TensorFormatToString(detected_io_format) + " for " + Name())
.c_str());
}
}

triton::common::TritonJson::Value config_inputs;
Expand Down Expand Up @@ -1808,17 +1837,21 @@ ModelInstanceState::ValidateIO()
}

RETURN_IF_ERROR(ValidateIOHelper(
config_inputs, allowed_shape_tensors, true /* is_input */));
config_inputs, allowed_shape_tensors, allowed_non_linear_format_io,
true /* is_input */));
RETURN_IF_ERROR(ValidateIOHelper(
config_outputs, allowed_shape_tensors, false /* is_input */));
config_outputs, allowed_shape_tensors, allowed_non_linear_format_io,
false /* is_input */));

return nullptr;
}

TRITONSERVER_Error*
ModelInstanceState::ValidateIOHelper(
common::TritonJson::Value& ios,
const std::set<std::string>& allowed_shape_tensors, const bool is_input)
const std::set<std::string>& allowed_shape_tensors,
const std::set<std::string>& allowed_non_linear_format_io,
const bool is_input)
{
std::string type = is_input ? "input" : "output";
for (size_t i = 0; i < ios.ArraySize(); i++) {
Expand Down Expand Up @@ -1865,6 +1898,34 @@ ModelInstanceState::ValidateIOHelper(
.c_str());
}
}

// Check the tensor IO format specification
if (allowed_non_linear_format_io.find(io_name) !=
allowed_non_linear_format_io.end()) {
bool is_non_linear_format_io = false;
RETURN_IF_ERROR(
io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
if (!is_non_linear_format_io) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(type + " '" + io_name + "' for model '" + model_state_->Name() +
"' uses a non-linear IO format, but 'is_non_linear_format_io' is "
"incorrectly set to false in the model configuration.")
.c_str());
}
} else {
bool is_non_linear_format_io = false;
RETURN_IF_ERROR(
io.MemberAsBool("is_non_linear_format_io", &is_non_linear_format_io));
if (is_non_linear_format_io) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INTERNAL,
(type + " '" + io_name + "' for model '" + model_state_->Name() +
"' uses a linear IO format, but 'is_non_linear_format_io' is "
"incorrectly set to true in the model configuration.")
.c_str());
}
}
}

return nullptr;
Expand Down
4 changes: 3 additions & 1 deletion src/instance_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -295,7 +295,9 @@ class ModelInstanceState : public TensorRTModelInstance {
TRITONSERVER_Error* ValidateIO();
TRITONSERVER_Error* ValidateIOHelper(
common::TritonJson::Value& ios,
const std::set<std::string>& allowed_shape_tensors, const bool is_input);
const std::set<std::string>& allowed_shape_tensors,
const std::set<std::string>& allowed_non_linear_format_io,
const bool is_input);

TRITONSERVER_Error* InitIOBindingBuffers();
TRITONSERVER_Error* InitializeConfigShapeInputBindings(
Expand Down
47 changes: 37 additions & 10 deletions src/model_state.cc
Original file line number Diff line number Diff line change
Expand Up @@ -754,7 +754,10 @@ ModelState::GetRefIO(
for (int i = 0; i < num_io_tensors; ++i) {
const std::string& tensor_name = engine->getIOTensorName(i);
nvinfer1::Dims dims = engine->getTensorShape(tensor_name.c_str());
bool is_shape_binding = engine->isShapeInferenceIO(tensor_name.c_str());
bool is_shape_tensor = engine->isShapeInferenceIO(tensor_name.c_str());
bool is_non_linear_format_io =
(engine->getTensorFormat(tensor_name.c_str()) !=
nvinfer1::TensorFormat::kLINEAR);
if ((is_input && (!IsInput(engine, tensor_name))) ||
((!is_input) && (IsInput(engine, tensor_name)))) {
continue;
Expand All @@ -766,8 +769,10 @@ ModelState::GetRefIO(
RETURN_IF_ERROR(io.AddString(
"data_type", ConvertTrtTypeToConfigDataType(
engine->getTensorDataType(tensor_name.c_str()))));
RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_binding, &io));
RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_binding));
RETURN_IF_ERROR(InitIODims(engine, dims, is_shape_tensor, &io));
RETURN_IF_ERROR(io.AddBool("is_shape_tensor", is_shape_tensor));
RETURN_IF_ERROR(
io.AddBool("is_non_linear_format_io", is_non_linear_format_io));

RETURN_IF_ERROR(ref_io->Append(std::move(io)));
}
Expand All @@ -777,13 +782,13 @@ ModelState::GetRefIO(

TRITONSERVER_Error*
ModelState::InitIODims(
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_binding,
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
triton::common::TritonJson::Value* io)
{
bool skip_first = (MaxBatchSize() != 0);
triton::common::TritonJson::Value config_dims(
ModelConfig(), triton::common::TritonJson::ValueType::ARRAY);
if (!is_shape_binding) {
if (!is_shape_tensor) {
for (int didx = (skip_first ? 1 : 0); didx < dims.nbDims; ++didx) {
RETURN_IF_ERROR(config_dims.AppendInt(dims.d[didx]));
}
Expand Down Expand Up @@ -871,8 +876,7 @@ ModelState::FixIO(
}

// Check if the IO is a shape tensor.
bool is_shape_tensor = false;
is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());
bool is_shape_tensor = engine->isShapeInferenceIO(io_name.c_str());

common::TritonJson::Value shape_tensor;
if (mutable_io.Find("is_shape_tensor", &shape_tensor)) {
Expand All @@ -885,15 +889,38 @@ ModelState::FixIO(
"' is incorrectly specified as a shape tensor.")
.c_str());
} else if (!shape_tensor_val && is_shape_tensor) {
RETURN_IF_ERROR(shape_tensor.SetBool(is_shape_tensor));
}
} else {
RETURN_IF_ERROR(
mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
}

// Verify if the IO format is non-linear.
bool is_non_linear_format_io =
(engine->getTensorFormat(io_name.c_str()) !=
nvinfer1::TensorFormat::kLINEAR);

common::TritonJson::Value non_linear_format_io;
if (mutable_io.Find(
"is_non_linear_format_io", &non_linear_format_io)) {
bool non_linear_format_io_val = false;
RETURN_IF_ERROR(
non_linear_format_io.AsBool(&non_linear_format_io_val));
if (non_linear_format_io_val && (!is_non_linear_format_io)) {
return TRITONSERVER_ErrorNew(
TRITONSERVER_ERROR_INVALID_ARG,
(std::string("'") + io_name +
"' is incorrectly specified as an execution tensor.")
"' uses a linear IO format, but 'is_non_linear_format_io' "
"is incorrectly set to true in the model configuration.")
.c_str());
} else if (!non_linear_format_io_val && is_non_linear_format_io) {
RETURN_IF_ERROR(
non_linear_format_io.SetBool(is_non_linear_format_io));
}
} else {
RETURN_IF_ERROR(
mutable_io.AddBool("is_shape_tensor", is_shape_tensor));
RETURN_IF_ERROR(mutable_io.AddBool(
"is_non_linear_format_io", is_non_linear_format_io));
}
break;
}
Expand Down
4 changes: 2 additions & 2 deletions src/model_state.h
Original file line number Diff line number Diff line change
Expand Up @@ -109,8 +109,8 @@ class ModelState : public TensorRTModel {
const bool is_input, nvinfer1::ICudaEngine* engine,
triton::common::TritonJson::Value* ref_io);
TRITONSERVER_Error* InitIODims(
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims,
bool is_shape_binding, triton::common::TritonJson::Value* io);
nvinfer1::ICudaEngine* engine, nvinfer1::Dims& dims, bool is_shape_tensor,
triton::common::TritonJson::Value* io);
TRITONSERVER_Error* FixIO(
nvinfer1::ICudaEngine* engine,
triton::common::TritonJson::Value& reference_ios,
Expand Down
35 changes: 35 additions & 0 deletions src/tensorrt_utils.cc
Original file line number Diff line number Diff line change
Expand Up @@ -491,6 +491,41 @@ DimsJsonToString(common::TritonJson::Value& dims)
return ShapeToString(dims_vec);
}

const std::string
TensorFormatToString(const nvinfer1::TensorFormat& io_format)
{
switch (io_format) {
case nvinfer1::TensorFormat::kLINEAR:
return "LINEAR";
case nvinfer1::TensorFormat::kCHW2:
return "CHW2";
case nvinfer1::TensorFormat::kCHW4:
return "CHW4";
case nvinfer1::TensorFormat::kCHW16:
return "CHW16";
case nvinfer1::TensorFormat::kCHW32:
return "CHW32";
case nvinfer1::TensorFormat::kDHWC:
return "DHWC";
case nvinfer1::TensorFormat::kDHWC8:
return "DHWC8";
case nvinfer1::TensorFormat::kHWC:
return "HWC";
case nvinfer1::TensorFormat::kHWC8:
return "HWC8";
case nvinfer1::TensorFormat::kHWC16:
return "HWC16";
case nvinfer1::TensorFormat::kCDHW32:
return "CDHW32";
case nvinfer1::TensorFormat::kDLA_LINEAR:
return "DLA_LINEAR";
case nvinfer1::TensorFormat::kDLA_HWC4:
return "DLA_HWC4";
default:
return "UNKNOWN";
}
}

TRITONSERVER_Error*
SupportsIntegratedZeroCopy(const int gpu_id, bool* zero_copy_support)
{
Expand Down
2 changes: 2 additions & 0 deletions src/tensorrt_utils.h
Original file line number Diff line number Diff line change
Expand Up @@ -108,6 +108,8 @@ const std::string DimsDebugString(const nvinfer1::Dims& dims);

const std::string DimsJsonToString(common::TritonJson::Value& dims);

const std::string TensorFormatToString(const nvinfer1::TensorFormat& io_format);

TRITONSERVER_Error* SupportsIntegratedZeroCopy(
const int gpu_id, bool* zero_copy_support);

Expand Down

0 comments on commit 8fe8277

Please sign in to comment.