Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add timeout to grpc client APIs #429

Merged
merged 12 commits into from
Nov 16, 2023
91 changes: 64 additions & 27 deletions src/c++/library/grpc_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -145,6 +145,20 @@ GetStub(

return stub;
}

/// Set client timeout
///
/// \param client_timeout_ms Deadline for timeout in microseconds
/// \param context Client context to add deadline to
void
SetTimeout(const uint64_t& client_timeout_ms, grpc::ClientContext* context)
{
if (client_timeout_ms != 0) {
auto deadline = std::chrono::system_clock::now() +
std::chrono::microseconds(client_timeout_ms);
context->set_deadline(deadline);
}
}
} // namespace

//==============================================================================
Expand Down Expand Up @@ -479,14 +493,16 @@ InferenceServerGrpcClient::Create(
}

Error
InferenceServerGrpcClient::IsServerLive(bool* live, const Headers& headers)
InferenceServerGrpcClient::IsServerLive(
bool* live, const Headers& headers, const uint64_t timeout_ms)
{
Error err;

inference::ServerLiveRequest request;
inference::ServerLiveResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -505,14 +521,16 @@ InferenceServerGrpcClient::IsServerLive(bool* live, const Headers& headers)
}

Error
InferenceServerGrpcClient::IsServerReady(bool* ready, const Headers& headers)
InferenceServerGrpcClient::IsServerReady(
bool* ready, const Headers& headers, const uint64_t timeout_ms)
{
Error err;

inference::ServerReadyRequest request;
inference::ServerReadyResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -533,14 +551,16 @@ InferenceServerGrpcClient::IsServerReady(bool* ready, const Headers& headers)
Error
InferenceServerGrpcClient::IsModelReady(
bool* ready, const std::string& model_name,
const std::string& model_version, const Headers& headers)
const std::string& model_version, const Headers& headers,
const uint64_t timeout_ms)
{
Error err;

inference::ModelReadyRequest request;
inference::ModelReadyResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -567,14 +587,16 @@ InferenceServerGrpcClient::IsModelReady(

Error
InferenceServerGrpcClient::ServerMetadata(
inference::ServerMetadataResponse* server_metadata, const Headers& headers)
inference::ServerMetadataResponse* server_metadata, const Headers& headers,
const uint64_t timeout_ms)
{
server_metadata->Clear();
Error err;

inference::ServerMetadataRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -597,14 +619,15 @@ Error
InferenceServerGrpcClient::ModelMetadata(
inference::ModelMetadataResponse* model_metadata,
const std::string& model_name, const std::string& model_version,
const Headers& headers)
const Headers& headers, const uint64_t timeout_ms)
{
model_metadata->Clear();
Error err;

inference::ModelMetadataRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -628,14 +651,16 @@ InferenceServerGrpcClient::ModelMetadata(
Error
InferenceServerGrpcClient::ModelConfig(
inference::ModelConfigResponse* model_config, const std::string& model_name,
const std::string& model_version, const Headers& headers)
const std::string& model_version, const Headers& headers,
const uint64_t timeout_ms)
{
model_config->Clear();
Error err;

inference::ModelConfigRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -658,14 +683,15 @@ InferenceServerGrpcClient::ModelConfig(
Error
InferenceServerGrpcClient::ModelRepositoryIndex(
inference::RepositoryIndexResponse* repository_index,
const Headers& headers)
const Headers& headers, const uint64_t timeout_ms)
{
repository_index->Clear();
Error err;

inference::RepositoryIndexRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -687,14 +713,16 @@ Error
InferenceServerGrpcClient::LoadModel(
const std::string& model_name, const Headers& headers,
const std::string& config,
const std::map<std::string, std::vector<char>>& files)
const std::map<std::string, std::vector<char>>& files,
const uint64_t timeout_ms)
{
Error err;

inference::RepositoryModelLoadRequest request;
inference::RepositoryModelLoadResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand Down Expand Up @@ -722,14 +750,16 @@ InferenceServerGrpcClient::LoadModel(

Error
InferenceServerGrpcClient::UnloadModel(
const std::string& model_name, const Headers& headers)
const std::string& model_name, const Headers& headers,
const uint64_t timeout_ms)
{
Error err;

inference::RepositoryModelUnloadRequest request;
inference::RepositoryModelUnloadResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -752,14 +782,15 @@ Error
InferenceServerGrpcClient::ModelInferenceStatistics(
inference::ModelStatisticsResponse* infer_stat,
const std::string& model_name, const std::string& model_version,
const Headers& headers)
const Headers& headers, const uint64_t timeout_ms)
{
infer_stat->Clear();
Error err;

inference::ModelStatisticsRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -783,12 +814,13 @@ Error
InferenceServerGrpcClient::UpdateTraceSettings(
inference::TraceSettingResponse* response, const std::string& model_name,
const std::map<std::string, std::vector<std::string>>& settings,
const Headers& headers)
const Headers& headers, const uint64_t timeout_ms)
{
inference::TraceSettingRequest request;
grpc::ClientContext context;
Error err;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand Down Expand Up @@ -823,14 +855,15 @@ InferenceServerGrpcClient::UpdateTraceSettings(
Error
InferenceServerGrpcClient::GetTraceSettings(
inference::TraceSettingResponse* settings, const std::string& model_name,
const Headers& headers)
const Headers& headers, const uint64_t timeout_ms)
{
settings->Clear();
Error err;

inference::TraceSettingRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -853,14 +886,16 @@ InferenceServerGrpcClient::GetTraceSettings(
Error
InferenceServerGrpcClient::SystemSharedMemoryStatus(
inference::SystemSharedMemoryStatusResponse* status,
const std::string& region_name, const Headers& headers)
const std::string& region_name, const Headers& headers,
const uint64_t timeout_ms)
{
status->Clear();
Error err;

inference::SystemSharedMemoryStatusRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -882,14 +917,15 @@ InferenceServerGrpcClient::SystemSharedMemoryStatus(
Error
InferenceServerGrpcClient::RegisterSystemSharedMemory(
const std::string& name, const std::string& key, const size_t byte_size,
const size_t offset, const Headers& headers)
const size_t offset, const Headers& headers, const uint64_t timeout_ms)
{
Error err;

inference::SystemSharedMemoryRegisterRequest request;
inference::SystemSharedMemoryRegisterResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -914,14 +950,15 @@ InferenceServerGrpcClient::RegisterSystemSharedMemory(

Error
InferenceServerGrpcClient::UnregisterSystemSharedMemory(
const std::string& name, const Headers& headers)
const std::string& name, const Headers& headers, const uint64_t timeout_ms)
{
Error err;

inference::SystemSharedMemoryUnregisterRequest request;
inference::SystemSharedMemoryUnregisterResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand Down Expand Up @@ -949,14 +986,16 @@ InferenceServerGrpcClient::UnregisterSystemSharedMemory(
Error
InferenceServerGrpcClient::CudaSharedMemoryStatus(
inference::CudaSharedMemoryStatusResponse* status,
const std::string& region_name, const Headers& headers)
const std::string& region_name, const Headers& headers,
const uint64_t timeout_ms)
{
status->Clear();
Error err;

inference::CudaSharedMemoryStatusRequest request;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -978,14 +1017,16 @@ InferenceServerGrpcClient::CudaSharedMemoryStatus(
Error
InferenceServerGrpcClient::RegisterCudaSharedMemory(
const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle,
const size_t device_id, const size_t byte_size, const Headers& headers)
const size_t device_id, const size_t byte_size, const Headers& headers,
const uint64_t timeout_ms)
{
Error err;

inference::CudaSharedMemoryRegisterRequest request;
inference::CudaSharedMemoryRegisterResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);
for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand All @@ -1010,14 +1051,16 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory(

Error
InferenceServerGrpcClient::UnregisterCudaSharedMemory(
const std::string& name, const Headers& headers)
const std::string& name, const Headers& headers, const uint64_t timeout_ms)
{
Error err;

inference::CudaSharedMemoryUnregisterRequest request;
inference::CudaSharedMemoryUnregisterResponse response;
grpc::ClientContext context;

SetTimeout(timeout_ms, &context);

for (const auto& it : headers) {
context.AddMetadata(it.first, it.second);
}
Expand Down Expand Up @@ -1064,9 +1107,7 @@ InferenceServerGrpcClient::Infer(
}

if (options.client_timeout_ != 0) {
auto deadline = std::chrono::system_clock::now() +
std::chrono::microseconds(options.client_timeout_);
context.set_deadline(deadline);
SetTimeout(options.client_timeout_, &context);
}
context.set_compression_algorithm(compression_algorithm);

Expand Down Expand Up @@ -1128,9 +1169,7 @@ InferenceServerGrpcClient::AsyncInfer(
}

if (options.client_timeout_ != 0) {
auto deadline = std::chrono::system_clock::now() +
std::chrono::microseconds(options.client_timeout_);
async_request->grpc_context_.set_deadline(deadline);
SetTimeout(options.client_timeout_, &(async_request->grpc_context_));
}
async_request->grpc_context_.set_compression_algorithm(compression_algorithm);

Expand Down Expand Up @@ -1300,9 +1339,7 @@ InferenceServerGrpcClient::StartStream(
}

if (stream_timeout != 0) {
auto deadline = std::chrono::system_clock::now() +
std::chrono::microseconds(stream_timeout);
grpc_context_.set_deadline(deadline);
SetTimeout(stream_timeout, &grpc_context_);
}
grpc_context_.set_compression_algorithm(compression_algorithm);

Expand Down
Loading
Loading