From c3e6af3f858f83bcd39881a05e71f63d2355bf09 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Mon, 14 Aug 2023 16:12:29 -0700 Subject: [PATCH 01/10] add timeout to all apis in grpc for C++ client --- src/c++/library/grpc_client.cc | 100 +++++++++++++++++++++++++++------ src/c++/library/grpc_client.h | 68 +++++++++++++++------- 2 files changed, 131 insertions(+), 37 deletions(-) diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index cc3a9a85f..c8466f43f 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -479,7 +479,8 @@ InferenceServerGrpcClient::Create( } Error -InferenceServerGrpcClient::IsServerLive(bool* live, const Headers& headers) +InferenceServerGrpcClient::IsServerLive( + bool* live, const Headers& headers, const int timeout_ms) { Error err; @@ -487,6 +488,9 @@ InferenceServerGrpcClient::IsServerLive(bool* live, const Headers& headers) inference::ServerLiveResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -505,7 +509,8 @@ InferenceServerGrpcClient::IsServerLive(bool* live, const Headers& headers) } Error -InferenceServerGrpcClient::IsServerReady(bool* ready, const Headers& headers) +InferenceServerGrpcClient::IsServerReady( + bool* ready, const Headers& headers, const int timeout_ms) { Error err; @@ -513,6 +518,9 @@ InferenceServerGrpcClient::IsServerReady(bool* ready, const Headers& headers) inference::ServerReadyResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -533,7 +541,8 @@ InferenceServerGrpcClient::IsServerReady(bool* ready, const Headers& headers) Error InferenceServerGrpcClient::IsModelReady( bool* ready, const std::string& model_name, - const std::string& model_version, const Headers& headers) + const std::string& model_version, const Headers& headers, + const int timeout_ms = INT_MAX) { Error err; @@ -541,6 +550,9 @@ InferenceServerGrpcClient::IsModelReady( inference::ModelReadyResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -567,7 +579,8 @@ InferenceServerGrpcClient::IsModelReady( Error InferenceServerGrpcClient::ServerMetadata( - inference::ServerMetadataResponse* server_metadata, const Headers& headers) + inference::ServerMetadataResponse* server_metadata, const Headers& headers, + const int timeout_ms) { server_metadata->Clear(); Error err; @@ -575,6 +588,9 @@ InferenceServerGrpcClient::ServerMetadata( inference::ServerMetadataRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -597,7 +613,7 @@ Error InferenceServerGrpcClient::ModelMetadata( inference::ModelMetadataResponse* model_metadata, const std::string& model_name, const std::string& model_version, - const Headers& headers) + const Headers& headers, const int timeout_ms) { model_metadata->Clear(); Error err; @@ -605,6 +621,9 @@ InferenceServerGrpcClient::ModelMetadata( inference::ModelMetadataRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -628,7 +647,8 @@ InferenceServerGrpcClient::ModelMetadata( Error InferenceServerGrpcClient::ModelConfig( inference::ModelConfigResponse* model_config, const std::string& model_name, - const std::string& model_version, const Headers& headers) + const std::string& model_version, const Headers& headers, + const int timeout_ms) { model_config->Clear(); Error err; @@ -636,6 +656,9 @@ InferenceServerGrpcClient::ModelConfig( inference::ModelConfigRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -658,7 +681,7 @@ InferenceServerGrpcClient::ModelConfig( Error InferenceServerGrpcClient::ModelRepositoryIndex( inference::RepositoryIndexResponse* repository_index, - const Headers& headers) + const Headers& headers, const int timeout_ms) { repository_index->Clear(); Error err; @@ -666,6 +689,9 @@ InferenceServerGrpcClient::ModelRepositoryIndex( inference::RepositoryIndexRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -687,7 +713,7 @@ Error InferenceServerGrpcClient::LoadModel( const std::string& model_name, const Headers& headers, const std::string& config, - const std::map>& files) + const std::map>& files, const int timeout_ms) { Error err; @@ -695,6 +721,9 @@ InferenceServerGrpcClient::LoadModel( inference::RepositoryModelLoadResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -722,7 +751,7 @@ InferenceServerGrpcClient::LoadModel( Error InferenceServerGrpcClient::UnloadModel( - const std::string& model_name, const Headers& headers) + const std::string& model_name, const Headers& headers, const int timeout_ms) { Error err; @@ -730,6 +759,9 @@ InferenceServerGrpcClient::UnloadModel( inference::RepositoryModelUnloadResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -752,7 +784,7 @@ Error InferenceServerGrpcClient::ModelInferenceStatistics( inference::ModelStatisticsResponse* infer_stat, const std::string& model_name, const std::string& model_version, - const Headers& headers) + const Headers& headers, const int timeout_ms) { infer_stat->Clear(); Error err; @@ -760,6 +792,9 @@ InferenceServerGrpcClient::ModelInferenceStatistics( inference::ModelStatisticsRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -783,12 +818,15 @@ Error InferenceServerGrpcClient::UpdateTraceSettings( inference::TraceSettingResponse* response, const std::string& model_name, const std::map>& settings, - const Headers& headers) + const Headers& headers, const int timeout_ms) { inference::TraceSettingRequest request; grpc::ClientContext context; Error err; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -823,7 +861,7 @@ InferenceServerGrpcClient::UpdateTraceSettings( Error InferenceServerGrpcClient::GetTraceSettings( inference::TraceSettingResponse* settings, const std::string& model_name, - const Headers& headers) + const Headers& headers, const int timeout_ms) { settings->Clear(); Error err; @@ -831,6 +869,9 @@ InferenceServerGrpcClient::GetTraceSettings( inference::TraceSettingRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -853,7 +894,8 @@ InferenceServerGrpcClient::GetTraceSettings( Error InferenceServerGrpcClient::SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusResponse* status, - const std::string& region_name, const Headers& headers) + const std::string& region_name, const Headers& headers, + const int timeout_ms) { status->Clear(); Error err; @@ -861,6 +903,9 @@ InferenceServerGrpcClient::SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -882,7 +927,7 @@ InferenceServerGrpcClient::SystemSharedMemoryStatus( Error InferenceServerGrpcClient::RegisterSystemSharedMemory( const std::string& name, const std::string& key, const size_t byte_size, - const size_t offset, const Headers& headers) + const size_t offset, const Headers& headers, const int timeout_ms) { Error err; @@ -890,6 +935,9 @@ InferenceServerGrpcClient::RegisterSystemSharedMemory( inference::SystemSharedMemoryRegisterResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -914,7 +962,7 @@ InferenceServerGrpcClient::RegisterSystemSharedMemory( Error InferenceServerGrpcClient::UnregisterSystemSharedMemory( - const std::string& name, const Headers& headers) + const std::string& name, const Headers& headers, const int timeout_ms) { Error err; @@ -922,6 +970,9 @@ InferenceServerGrpcClient::UnregisterSystemSharedMemory( inference::SystemSharedMemoryUnregisterResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -949,7 +1000,8 @@ InferenceServerGrpcClient::UnregisterSystemSharedMemory( Error InferenceServerGrpcClient::CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusResponse* status, - const std::string& region_name, const Headers& headers) + const std::string& region_name, const Headers& headers, + const int timeout_ms) { status->Clear(); Error err; @@ -957,6 +1009,9 @@ InferenceServerGrpcClient::CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusRequest request; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -978,7 +1033,8 @@ InferenceServerGrpcClient::CudaSharedMemoryStatus( Error InferenceServerGrpcClient::RegisterCudaSharedMemory( const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle, - const size_t device_id, const size_t byte_size, const Headers& headers) + const size_t device_id, const size_t byte_size, const Headers& headers, + const int timeout_ms) { Error err; @@ -986,6 +1042,9 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory( inference::CudaSharedMemoryRegisterResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -1010,7 +1069,8 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory( Error InferenceServerGrpcClient::UnregisterCudaSharedMemory( - const std::string& name, const Headers& headers) + const std::string& name, const Headers& headers, const int timeout_ms, + const int timeout_ms) { Error err; @@ -1018,6 +1078,10 @@ InferenceServerGrpcClient::UnregisterCudaSharedMemory( inference::CudaSharedMemoryUnregisterResponse response; grpc::ClientContext context; + auto deadline = + std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); + context.set_deadline(deadline); + for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } diff --git a/src/c++/library/grpc_client.h b/src/c++/library/grpc_client.h index 199ebed40..56f1dfa56 100644 --- a/src/c++/library/grpc_client.h +++ b/src/c++/library/grpc_client.h @@ -156,15 +156,21 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param live Returns whether the server is live or not. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. - Error IsServerLive(bool* live, const Headers& headers = Headers()); + Error IsServerLive( + bool* live, const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get its readiness. /// \param ready Returns whether the server is ready or not. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. - Error IsServerReady(bool* ready, const Headers& headers = Headers()); + Error IsServerReady( + bool* ready, const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get the readiness of specified model. /// \param ready Returns whether the specified model is ready or not. @@ -174,21 +180,23 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call /// \return Error object indicating success or failure of the request. Error IsModelReady( bool* ready, const std::string& model_name, - const std::string& model_version = "", - const Headers& headers = Headers()); + const std::string& model_version = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get its metadata. /// \param server_metadata Returns the server metadata as /// SeverMetadataResponse message. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error ServerMetadata( inference::ServerMetadataResponse* server_metadata, - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Contact the inference server and get the metadata of specified model. /// \param model_metadata Returns model metadata as ModelMetadataResponse @@ -199,11 +207,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error ModelMetadata( inference::ModelMetadataResponse* model_metadata, const std::string& model_name, const std::string& model_version = "", - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Contact the inference server and get the configuration of specified model. /// \param model_config Returns model config as ModelConfigResponse @@ -214,11 +223,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error ModelConfig( inference::ModelConfigResponse* model_config, const std::string& model_name, const std::string& model_version = "", - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Contact the inference server and get the index of model repository /// contents. @@ -226,10 +236,11 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// RepositoryIndexRequestResponse /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error ModelRepositoryIndex( inference::RepositoryIndexResponse* repository_index, - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Request the inference server to load or reload specified model. /// \param model_name The name of the model to be loaded or reloaded. @@ -243,19 +254,23 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// The files will form the model directory that the model /// will be loaded from. If specified, 'config' must be provided to be /// the model configuration of the override model directory. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error LoadModel( const std::string& model_name, const Headers& headers = Headers(), const std::string& config = std::string(), - const std::map>& files = {}); + const std::map>& files = {}, + const int timeout_ms = INT_MAX); /// Request the inference server to unload specified model. /// \param model_name The name of the model to be unloaded. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error UnloadModel( - const std::string& model_name, const Headers& headers = Headers()); + const std::string& model_name, const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get the inference statistics for the /// specified model name and version. @@ -269,11 +284,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error ModelInferenceStatistics( inference::ModelStatisticsResponse* infer_stat, const std::string& model_name = "", const std::string& model_version = "", - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Update the trace settings for the specified model name, or global trace /// settings if model name is not given. @@ -289,13 +305,14 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// loading the model. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error UpdateTraceSettings( inference::TraceSettingResponse* response, const std::string& model_name = "", const std::map>& settings = std::map>(), - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Get the trace settings for the specified model name, or global trace /// settings if model name is not given. @@ -305,10 +322,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error GetTraceSettings( inference::TraceSettingResponse* settings, - const std::string& model_name = "", const Headers& headers = Headers()); + const std::string& model_name = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get the status for requested system /// shared memory. @@ -319,10 +338,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// shared memory will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusResponse* status, - const std::string& region_name = "", const Headers& headers = Headers()); + const std::string& region_name = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Request the server to register a system shared memory with the provided /// details. @@ -334,10 +355,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// the start of the system shared memory region. The default value is zero. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request Error RegisterSystemSharedMemory( const std::string& name, const std::string& key, const size_t byte_size, - const size_t offset = 0, const Headers& headers = Headers()); + const size_t offset = 0, const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Request the server to unregister a system shared memory with the /// specified name. @@ -346,9 +369,11 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// unregistered. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request Error UnregisterSystemSharedMemory( - const std::string& name = "", const Headers& headers = Headers()); + const std::string& name = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Contact the inference server and get the status for requested CUDA /// shared memory. @@ -359,10 +384,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// shared memory will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request. Error CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusResponse* status, - const std::string& region_name = "", const Headers& headers = Headers()); + const std::string& region_name = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Request the server to register a CUDA shared memory with the provided /// details. @@ -374,11 +401,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// bytes. /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request Error RegisterCudaSharedMemory( const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle, const size_t device_id, const size_t byte_size, - const Headers& headers = Headers()); + const Headers& headers = Headers(), const int timeout_ms = INT_MAX); /// Request the server to unregister a CUDA shared memory with the /// specified name. @@ -387,9 +415,11 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// unregistered. /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. + /// \param timeout_ms Optional timeout for API call. /// \return Error object indicating success or failure of the request Error UnregisterCudaSharedMemory( - const std::string& name = "", const Headers& headers = Headers()); + const std::string& name = "", const Headers& headers = Headers(), + const int timeout_ms = INT_MAX); /// Run synchronous inference on server. /// \param result Returns the result of inference. From 93d7d35a73aad5abadf7e0012e480ffd4117b0de Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Mon, 14 Aug 2023 17:32:35 -0700 Subject: [PATCH 02/10] add timeout to python grpc client --- src/c++/library/grpc_client.cc | 5 +- .../library/tritonclient/grpc/_client.py | 190 ++++++++++++++---- .../library/tritonclient/grpc/aio/__init__.py | 135 +++++++++---- 3 files changed, 247 insertions(+), 83 deletions(-) diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index c8466f43f..151e9a339 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -542,7 +542,7 @@ Error InferenceServerGrpcClient::IsModelReady( bool* ready, const std::string& model_name, const std::string& model_version, const Headers& headers, - const int timeout_ms = INT_MAX) + const int timeout_ms) { Error err; @@ -1069,8 +1069,7 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory( Error InferenceServerGrpcClient::UnregisterCudaSharedMemory( - const std::string& name, const Headers& headers, const int timeout_ms, - const int timeout_ms) + const std::string& name, const Headers& headers, const int timeout_ms) { Error err; diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index c4f56521f..15263b2ea 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -264,7 +264,7 @@ def close(self): self.stop_stream() self._channel.close() - def is_server_live(self, headers=None): + def is_server_live(self, headers=None, client_timeout=sys.maxint): """Contact the inference server and get liveness. Parameters @@ -272,6 +272,8 @@ def is_server_live(self, headers=None): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request. Returns ------- @@ -289,14 +291,16 @@ def is_server_live(self, headers=None): request = service_pb2.ServerLiveRequest() if self._verbose: print("is_server_live, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.ServerLive(request=request, metadata=metadata) + response = self._client_stub.ServerLive( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) return response.live except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def is_server_ready(self, headers=None): + def is_server_ready(self, headers=None, timeout=sys.intmax): """Contact the inference server and get readiness. Parameters @@ -304,6 +308,8 @@ def is_server_ready(self, headers=None): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request. Returns ------- @@ -321,14 +327,18 @@ def is_server_ready(self, headers=None): request = service_pb2.ServerReadyRequest() if self._verbose: print("is_server_ready, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.ServerReady(request=request, metadata=metadata) + response = self._client_stub.ServerReady( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) return response.ready except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def is_model_ready(self, model_name, model_version="", headers=None): + def is_model_ready( + self, model_name, model_version="", headers=None, client_timeout=sys.maxint + ): """Contact the inference server and get the readiness of specified model. Parameters @@ -342,6 +352,8 @@ def is_model_ready(self, model_name, model_version="", headers=None): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request Returns ------- @@ -363,14 +375,18 @@ def is_model_ready(self, model_name, model_version="", headers=None): ) if self._verbose: print("is_model_ready, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.ModelReady(request=request, metadata=metadata) + response = self._client_stub.ModelReady( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) return response.ready except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_server_metadata(self, headers=None, as_json=False): + def get_server_metadata( + self, headers=None, as_json=False, client_timeout=sys.maxint + ): """Contact the inference server and get its metadata. Parameters @@ -386,6 +402,9 @@ def get_server_metadata(self, headers=None, as_json=False): are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request + Returns ------- @@ -405,7 +424,7 @@ def get_server_metadata(self, headers=None, as_json=False): if self._verbose: print("get_server_metadata, metadata {}\n{}".format(metadata, request)) response = self._client_stub.ServerMetadata( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -419,7 +438,12 @@ def get_server_metadata(self, headers=None, as_json=False): raise_error_grpc(rpc_error) def get_model_metadata( - self, model_name, model_version="", headers=None, as_json=False + self, + model_name, + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Contact the inference server and get the metadata for specified model. @@ -442,6 +466,8 @@ def get_model_metadata( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -465,7 +491,7 @@ def get_model_metadata( if self._verbose: print("get_model_metadata, metadata {}\n{}".format(metadata, request)) response = self._client_stub.ModelMetadata( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -479,7 +505,12 @@ def get_model_metadata( raise_error_grpc(rpc_error) def get_model_config( - self, model_name, model_version="", headers=None, as_json=False + self, + model_name, + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Contact the inference server and get the configuration for specified model. @@ -502,6 +533,8 @@ def get_model_config( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -524,7 +557,9 @@ def get_model_config( ) if self._verbose: print("get_model_config, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.ModelConfig(request=request, metadata=metadata) + response = self._client_stub.ModelConfig( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) if as_json: @@ -536,7 +571,9 @@ def get_model_config( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_model_repository_index(self, headers=None, as_json=False): + def get_model_repository_index( + self, headers=None, as_json=False, client_timeout=sys.maxint + ): """Get the index of model repository contents Parameters @@ -553,6 +590,8 @@ def get_model_repository_index(self, headers=None, as_json=False): represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -571,7 +610,7 @@ def get_model_repository_index(self, headers=None, as_json=False): ) ) response = self._client_stub.RepositoryIndex( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -584,7 +623,14 @@ def get_model_repository_index(self, headers=None, as_json=False): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def load_model(self, model_name, headers=None, config=None, files=None): + def load_model( + self, + model_name, + headers=None, + config=None, + files=None, + client_timeout=sys.maxint, + ): """Request the inference server to load or reload specified model. Parameters @@ -604,6 +650,8 @@ def load_model(self, model_name, headers=None, config=None, files=None): The files will form the model directory that the model will be loaded from. If specified, 'config' must be provided to be the model configuration of the override model directory. + client_timeout: int + Optional timeout for the request Raises ------ @@ -626,13 +674,21 @@ def load_model(self, model_name, headers=None, config=None, files=None): if files is not None: for path, content in files.items(): request.parameters[path].bytes_param = content - self._client_stub.RepositoryModelLoad(request=request, metadata=metadata) + self._client_stub.RepositoryModelLoad( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print("Loaded model '{}'".format(model_name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def unload_model(self, model_name, headers=None, unload_dependents=False): + def unload_model( + self, + model_name, + headers=None, + unload_dependents=False, + client_timeout=sys.maxint, + ): """Request the inference server to unload specified model. Parameters @@ -644,6 +700,8 @@ def unload_model(self, model_name, headers=None, unload_dependents=False): headers to include in the request. unload_dependents : bool Whether the dependents of the model should also be unloaded. + client_timeout: int + Optional timeout for the request Raises ------ @@ -657,14 +715,21 @@ def unload_model(self, model_name, headers=None, unload_dependents=False): request.parameters["unload_dependents"].bool_param = unload_dependents if self._verbose: print("unload_model, metadata {}\n{}".format(metadata, request)) - self._client_stub.RepositoryModelUnload(request=request, metadata=metadata) + self._client_stub.RepositoryModelUnload( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print("Unloaded model '{}'".format(model_name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) def get_inference_statistics( - self, model_name="", model_version="", headers=None, as_json=False + self, + model_name="", + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Get the inference statistics for the specified model name and version. @@ -691,6 +756,8 @@ def get_inference_statistics( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Raises ------ @@ -712,7 +779,7 @@ def get_inference_statistics( ) ) response = self._client_stub.ModelStatistics( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -726,7 +793,12 @@ def get_inference_statistics( raise_error_grpc(rpc_error) def update_trace_settings( - self, model_name=None, settings={}, headers=None, as_json=False + self, + model_name=None, + settings={}, + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Update the trace settings for the specified model name, or global trace settings if model name is not given. @@ -754,6 +826,8 @@ def update_trace_settings( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -785,7 +859,7 @@ def update_trace_settings( "update_trace_settings, metadata {}\n{}".format(metadata, request) ) response = self._client_stub.TraceSetting( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -798,7 +872,9 @@ def update_trace_settings( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_trace_settings(self, model_name=None, headers=None, as_json=False): + def get_trace_settings( + self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxint + ): """Get the trace settings for the specified model name, or global trace settings if model name is not given @@ -820,6 +896,8 @@ def get_trace_settings(self, model_name=None, headers=None, as_json=False): represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -841,7 +919,7 @@ def get_trace_settings(self, model_name=None, headers=None, as_json=False): if self._verbose: print("get_trace_settings, metadata {}\n{}".format(metadata, request)) response = self._client_stub.TraceSetting( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -854,7 +932,9 @@ def get_trace_settings(self, model_name=None, headers=None, as_json=False): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def update_log_settings(self, settings, headers=None, as_json=False): + def update_log_settings( + self, settings, headers=None, as_json=False, client_timeout=sys.maxint + ): """Update the global log settings. Returns the log settings after the update. Parameters @@ -874,6 +954,8 @@ def update_log_settings(self, settings, headers=None, as_json=False): represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- dict or protobuf message @@ -900,7 +982,9 @@ def update_log_settings(self, settings, headers=None, as_json=False): if self._verbose: print("update_log_settings, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.LogSettings(request=request, metadata=metadata) + response = self._client_stub.LogSettings( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) if as_json: @@ -912,7 +996,7 @@ def update_log_settings(self, settings, headers=None, as_json=False): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_log_settings(self, headers=None, as_json=False): + def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxint): """Get the global log settings. Parameters ---------- @@ -928,6 +1012,8 @@ def get_log_settings(self, headers=None, as_json=False): represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- dict or protobuf message @@ -943,7 +1029,9 @@ def get_log_settings(self, headers=None, as_json=False): request = service_pb2.LogSettingsRequest() if self._verbose: print("get_log_settings, metadata {}\n{}".format(metadata, request)) - response = self._client_stub.LogSettings(request=request, metadata=metadata) + response = self._client_stub.LogSettings( + request=request, metadata=metadata, timeout=client_timeout + ) if self._verbose: print(response) if as_json: @@ -956,7 +1044,7 @@ def get_log_settings(self, headers=None, as_json=False): raise_error_grpc(rpc_error) def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint ): """Request system shared memory status from the server. @@ -977,6 +1065,8 @@ def get_system_shared_memory_status( are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -1000,7 +1090,7 @@ def get_system_shared_memory_status( ) ) response = self._client_stub.SystemSharedMemoryStatus( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -1014,7 +1104,7 @@ def get_system_shared_memory_status( raise_error_grpc(rpc_error) def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None + self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxint ): """Request the server to register a system shared memory with the following specification. @@ -1035,6 +1125,8 @@ def register_system_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request Raises ------ @@ -1054,14 +1146,16 @@ def register_system_shared_memory( ) ) self._client_stub.SystemSharedMemoryRegister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Registered system shared memory with name '{}'".format(name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def unregister_system_shared_memory(self, name="", headers=None): + def unregister_system_shared_memory( + self, name="", headers=None, client_timeout=sys.maxint + ): """Request the server to unregister a system shared memory with the specified name. @@ -1074,6 +1168,8 @@ def unregister_system_shared_memory(self, name="", headers=None): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request Raises ------ @@ -1091,7 +1187,7 @@ def unregister_system_shared_memory(self, name="", headers=None): ) ) self._client_stub.SystemSharedMemoryUnregister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: if name != "": @@ -1104,7 +1200,7 @@ def unregister_system_shared_memory(self, name="", headers=None): raise_error_grpc(rpc_error) def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint ): """Request cuda shared memory status from the server. @@ -1125,6 +1221,8 @@ def get_cuda_shared_memory_status( are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. + client_timeout: int + Optional timeout for the request Returns ------- @@ -1149,7 +1247,7 @@ def get_cuda_shared_memory_status( ) ) response = self._client_stub.CudaSharedMemoryStatus( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -1163,7 +1261,13 @@ def get_cuda_shared_memory_status( raise_error_grpc(rpc_error) def register_cuda_shared_memory( - self, name, raw_handle, device_id, byte_size, headers=None + self, + name, + raw_handle, + device_id, + byte_size, + headers=None, + client_timeout=sys.maxint, ): """Request the server to register a system shared memory with the following specification. @@ -1181,6 +1285,8 @@ def register_cuda_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request Raises ------ @@ -1203,14 +1309,16 @@ def register_cuda_shared_memory( ) ) self._client_stub.CudaSharedMemoryRegister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Registered cuda shared memory with name '{}'".format(name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def unregister_cuda_shared_memory(self, name="", headers=None): + def unregister_cuda_shared_memory( + self, name="", headers=None, client_timeout=sys.maxint + ): """Request the server to unregister a cuda shared memory with the specified name. @@ -1223,6 +1331,8 @@ def unregister_cuda_shared_memory(self, name="", headers=None): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. + client_timeout: int + Optional timeout for the request Raises ------ @@ -1240,7 +1350,7 @@ def unregister_cuda_shared_memory(self, name="", headers=None): ) ) self._client_stub.CudaSharedMemoryUnregister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: if name != "": diff --git a/src/python/library/tritonclient/grpc/aio/__init__.py b/src/python/library/tritonclient/grpc/aio/__init__.py index 37414dacb..e90ca298c 100755 --- a/src/python/library/tritonclient/grpc/aio/__init__.py +++ b/src/python/library/tritonclient/grpc/aio/__init__.py @@ -140,7 +140,7 @@ def _get_metadata(self, headers): ) return request_metadata - async def is_server_live(self, headers=None): + async def is_server_live(self, headers=None, client_timeout=sys.maxint): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -148,7 +148,7 @@ async def is_server_live(self, headers=None): if self._verbose: print("is_server_live, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ServerLive( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -156,7 +156,7 @@ async def is_server_live(self, headers=None): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def is_server_ready(self, headers=None): + async def is_server_ready(self, headers=None, timeout=sys.intmax): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -164,7 +164,7 @@ async def is_server_ready(self, headers=None): if self._verbose: print("is_server_ready, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ServerReady( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -172,7 +172,9 @@ async def is_server_ready(self, headers=None): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def is_model_ready(self, model_name, model_version="", headers=None): + async def is_model_ready( + self, model_name, model_version="", headers=None, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -184,7 +186,7 @@ async def is_model_ready(self, model_name, model_version="", headers=None): if self._verbose: print("is_model_ready, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ModelReady( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=sys.intmax ) if self._verbose: print(response) @@ -192,7 +194,9 @@ async def is_model_ready(self, model_name, model_version="", headers=None): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def get_server_metadata(self, headers=None, as_json=False): + async def get_server_metadata( + self, headers=None, as_json=False, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -200,7 +204,7 @@ async def get_server_metadata(self, headers=None, as_json=False): if self._verbose: print("get_server_metadata, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ServerMetadata( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -209,7 +213,12 @@ async def get_server_metadata(self, headers=None, as_json=False): raise_error_grpc(rpc_error) async def get_model_metadata( - self, model_name, model_version="", headers=None, as_json=False + self, + model_name, + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -222,7 +231,7 @@ async def get_model_metadata( if self._verbose: print("get_model_metadata, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ModelMetadata( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -231,7 +240,12 @@ async def get_model_metadata( raise_error_grpc(rpc_error) async def get_model_config( - self, model_name, model_version="", headers=None, as_json=False + self, + model_name, + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -244,7 +258,7 @@ async def get_model_config( if self._verbose: print("get_model_config, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ModelConfig( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -252,7 +266,9 @@ async def get_model_config( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def get_model_repository_index(self, headers=None, as_json=False): + async def get_model_repository_index( + self, headers=None, as_json=False, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -264,7 +280,7 @@ async def get_model_repository_index(self, headers=None, as_json=False): ) ) response = await self._client_stub.RepositoryIndex( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -272,7 +288,14 @@ async def get_model_repository_index(self, headers=None, as_json=False): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def load_model(self, model_name, headers=None, config=None, files=None): + async def load_model( + self, + model_name, + headers=None, + config=None, + files=None, + client_timeout=sys.maxint, + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -290,14 +313,20 @@ async def load_model(self, model_name, headers=None, config=None, files=None): for path, content in files.items(): request.parameters[path].bytes_param = content await self._client_stub.RepositoryModelLoad( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Loaded model '{}'".format(model_name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def unload_model(self, model_name, headers=None, unload_dependents=False): + async def unload_model( + self, + model_name, + headers=None, + unload_dependents=False, + client_timeout=sys.maxint, + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -306,7 +335,7 @@ async def unload_model(self, model_name, headers=None, unload_dependents=False): if self._verbose: print("unload_model, metadata {}\n{}".format(metadata, request)) await self._client_stub.RepositoryModelUnload( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Unloaded model '{}'".format(model_name)) @@ -314,7 +343,12 @@ async def unload_model(self, model_name, headers=None, unload_dependents=False): raise_error_grpc(rpc_error) async def get_inference_statistics( - self, model_name="", model_version="", headers=None, as_json=False + self, + model_name="", + model_version="", + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -322,7 +356,7 @@ async def get_inference_statistics( if type(model_version) != str: raise_error("model version must be a string") request = service_pb2.ModelStatisticsRequest( - name=model_name, version=model_version + name=model_name, version=model_version, timeout=client_timeout ) if self._verbose: print( @@ -340,7 +374,12 @@ async def get_inference_statistics( raise_error_grpc(rpc_error) async def update_trace_settings( - self, model_name=None, settings={}, headers=None, as_json=False + self, + model_name=None, + settings={}, + headers=None, + as_json=False, + client_timeout=sys.maxint, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -361,7 +400,7 @@ async def update_trace_settings( "update_trace_settings, metadata {}\n{}".format(metadata, request) ) response = await self._client_stub.TraceSetting( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -369,7 +408,9 @@ async def update_trace_settings( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def get_trace_settings(self, model_name=None, headers=None, as_json=False): + async def get_trace_settings( + self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -379,7 +420,7 @@ async def get_trace_settings(self, model_name=None, headers=None, as_json=False) if self._verbose: print("get_trace_settings, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.TraceSetting( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -387,7 +428,9 @@ async def get_trace_settings(self, model_name=None, headers=None, as_json=False) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def update_log_settings(self, settings, headers=None, as_json=False): + async def update_log_settings( + self, settings, headers=None, as_json=False, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -406,7 +449,7 @@ async def update_log_settings(self, settings, headers=None, as_json=False): if self._verbose: print("update_log_settings, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.LogSettings( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -414,7 +457,9 @@ async def update_log_settings(self, settings, headers=None, as_json=False): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def get_log_settings(self, headers=None, as_json=False): + async def get_log_settings( + self, headers=None, as_json=False, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -422,7 +467,7 @@ async def get_log_settings(self, headers=None, as_json=False): if self._verbose: print("get_log_settings, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.LogSettings( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -431,7 +476,7 @@ async def get_log_settings(self, headers=None, as_json=False): raise_error_grpc(rpc_error) async def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -444,7 +489,7 @@ async def get_system_shared_memory_status( ) ) response = await self._client_stub.SystemSharedMemoryStatus( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -453,7 +498,7 @@ async def get_system_shared_memory_status( raise_error_grpc(rpc_error) async def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None + self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxint ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -468,14 +513,16 @@ async def register_system_shared_memory( ) ) await self._client_stub.SystemSharedMemoryRegister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Registered system shared memory with name '{}'".format(name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def unregister_system_shared_memory(self, name="", headers=None): + async def unregister_system_shared_memory( + self, name="", headers=None, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -487,7 +534,7 @@ async def unregister_system_shared_memory(self, name="", headers=None): ) ) await self._client_stub.SystemSharedMemoryUnregister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: if name != "": @@ -500,7 +547,7 @@ async def unregister_system_shared_memory(self, name="", headers=None): raise_error_grpc(rpc_error) async def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint ): """Refer to tritonclient.grpc.InferenceServerClient""" @@ -514,7 +561,7 @@ async def get_cuda_shared_memory_status( ) ) response = await self._client_stub.CudaSharedMemoryStatus( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -523,7 +570,13 @@ async def get_cuda_shared_memory_status( raise_error_grpc(rpc_error) async def register_cuda_shared_memory( - self, name, raw_handle, device_id, byte_size, headers=None + self, + name, + raw_handle, + device_id, + byte_size, + headers=None, + client_timeout=sys.maxint, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -541,14 +594,16 @@ async def register_cuda_shared_memory( ) ) await self._client_stub.CudaSharedMemoryRegister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print("Registered cuda shared memory with name '{}'".format(name)) except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def unregister_cuda_shared_memory(self, name="", headers=None): + async def unregister_cuda_shared_memory( + self, name="", headers=None, client_timeout=sys.maxint + ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -560,7 +615,7 @@ async def unregister_cuda_shared_memory(self, name="", headers=None): ) ) await self._client_stub.CudaSharedMemoryUnregister( - request=request, metadata=metadata + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: if name != "": From 0300231d19823ad7f18c7653cbab9cf72591a21b Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Thu, 17 Aug 2023 15:29:57 -0700 Subject: [PATCH 03/10] fix doumentation and call pattern since maxint is deprecated --- .../library/tritonclient/grpc/_client.py | 77 ++++++++++--------- .../library/tritonclient/grpc/aio/__init__.py | 43 ++++++----- 2 files changed, 61 insertions(+), 59 deletions(-) diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 15263b2ea..0070e1feb 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -28,6 +28,7 @@ import base64 import struct +import sys import grpc import rapidjson as json @@ -264,7 +265,7 @@ def close(self): self.stop_stream() self._channel.close() - def is_server_live(self, headers=None, client_timeout=sys.maxint): + def is_server_live(self, headers=None, client_timeout=sys.maxsize): """Contact the inference server and get liveness. Parameters @@ -300,7 +301,7 @@ def is_server_live(self, headers=None, client_timeout=sys.maxint): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def is_server_ready(self, headers=None, timeout=sys.intmax): + def is_server_ready(self, headers=None, client_timeout=sys.maxsize): """Contact the inference server and get readiness. Parameters @@ -337,7 +338,7 @@ def is_server_ready(self, headers=None, timeout=sys.intmax): raise_error_grpc(rpc_error) def is_model_ready( - self, model_name, model_version="", headers=None, client_timeout=sys.maxint + self, model_name, model_version="", headers=None, client_timeout=sys.maxsize ): """Contact the inference server and get the readiness of specified model. @@ -353,7 +354,7 @@ def is_model_ready( Optional dictionary specifying additional HTTP headers to include in the request. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -385,7 +386,7 @@ def is_model_ready( raise_error_grpc(rpc_error) def get_server_metadata( - self, headers=None, as_json=False, client_timeout=sys.maxint + self, headers=None, as_json=False, client_timeout=sys.maxsize ): """Contact the inference server and get its metadata. @@ -403,7 +404,7 @@ def get_server_metadata( responsibility to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns @@ -443,7 +444,7 @@ def get_model_metadata( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Contact the inference server and get the metadata for specified model. @@ -467,7 +468,7 @@ def get_model_metadata( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -510,7 +511,7 @@ def get_model_config( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Contact the inference server and get the configuration for specified model. @@ -534,7 +535,7 @@ def get_model_config( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -572,7 +573,7 @@ def get_model_config( raise_error_grpc(rpc_error) def get_model_repository_index( - self, headers=None, as_json=False, client_timeout=sys.maxint + self, headers=None, as_json=False, client_timeout=sys.maxsize ): """Get the index of model repository contents @@ -591,7 +592,7 @@ def get_model_repository_index( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -629,7 +630,7 @@ def load_model( headers=None, config=None, files=None, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Request the inference server to load or reload specified model. @@ -651,7 +652,7 @@ def load_model( loaded from. If specified, 'config' must be provided to be the model configuration of the override model directory. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -687,7 +688,7 @@ def unload_model( model_name, headers=None, unload_dependents=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Request the inference server to unload specified model. @@ -701,7 +702,7 @@ def unload_model( unload_dependents : bool Whether the dependents of the model should also be unloaded. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -729,7 +730,7 @@ def get_inference_statistics( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Get the inference statistics for the specified model name and version. @@ -757,7 +758,7 @@ def get_inference_statistics( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -798,7 +799,7 @@ def update_trace_settings( settings={}, headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Update the trace settings for the specified model name, or global trace settings if model name is not given. @@ -827,7 +828,7 @@ def update_trace_settings( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -873,7 +874,7 @@ def update_trace_settings( raise_error_grpc(rpc_error) def get_trace_settings( - self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxint + self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxsize ): """Get the trace settings for the specified model name, or global trace settings if model name is not given @@ -897,7 +898,7 @@ def get_trace_settings( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -933,7 +934,7 @@ def get_trace_settings( raise_error_grpc(rpc_error) def update_log_settings( - self, settings, headers=None, as_json=False, client_timeout=sys.maxint + self, settings, headers=None, as_json=False, client_timeout=sys.maxsize ): """Update the global log settings. Returns the log settings after the update. @@ -955,7 +956,7 @@ def update_log_settings( to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- dict or protobuf message @@ -996,7 +997,7 @@ def update_log_settings( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxint): + def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxsize): """Get the global log settings. Parameters ---------- @@ -1013,7 +1014,7 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxin to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- dict or protobuf message @@ -1044,7 +1045,7 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxin raise_error_grpc(rpc_error) def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize ): """Request system shared memory status from the server. @@ -1066,7 +1067,7 @@ def get_system_shared_memory_status( responsibility to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -1104,7 +1105,7 @@ def get_system_shared_memory_status( raise_error_grpc(rpc_error) def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxint + self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxsize ): """Request the server to register a system shared memory with the following specification. @@ -1126,7 +1127,7 @@ def register_system_shared_memory( Optional dictionary specifying additional HTTP headers to include in the request. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -1154,7 +1155,7 @@ def register_system_shared_memory( raise_error_grpc(rpc_error) def unregister_system_shared_memory( - self, name="", headers=None, client_timeout=sys.maxint + self, name="", headers=None, client_timeout=sys.maxsize ): """Request the server to unregister a system shared memory with the specified name. @@ -1169,7 +1170,7 @@ def unregister_system_shared_memory( Optional dictionary specifying additional HTTP headers to include in the request. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -1200,7 +1201,7 @@ def unregister_system_shared_memory( raise_error_grpc(rpc_error) def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize ): """Request cuda shared memory status from the server. @@ -1222,7 +1223,7 @@ def get_cuda_shared_memory_status( responsibility to convert these strings back to int64 values as necessary. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Returns ------- @@ -1267,7 +1268,7 @@ def register_cuda_shared_memory( device_id, byte_size, headers=None, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Request the server to register a system shared memory with the following specification. @@ -1286,7 +1287,7 @@ def register_cuda_shared_memory( Optional dictionary specifying additional HTTP headers to include in the request. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ @@ -1317,7 +1318,7 @@ def register_cuda_shared_memory( raise_error_grpc(rpc_error) def unregister_cuda_shared_memory( - self, name="", headers=None, client_timeout=sys.maxint + self, name="", headers=None, client_timeout=sys.maxsize ): """Request the server to unregister a cuda shared memory with the specified name. @@ -1332,7 +1333,7 @@ def unregister_cuda_shared_memory( Optional dictionary specifying additional HTTP headers to include in the request. client_timeout: int - Optional timeout for the request + Optional timeout for the request. Raises ------ diff --git a/src/python/library/tritonclient/grpc/aio/__init__.py b/src/python/library/tritonclient/grpc/aio/__init__.py index e90ca298c..7ad714031 100755 --- a/src/python/library/tritonclient/grpc/aio/__init__.py +++ b/src/python/library/tritonclient/grpc/aio/__init__.py @@ -27,6 +27,7 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. import base64 +import sys import rapidjson as json from google.protobuf.json_format import MessageToJson @@ -140,7 +141,7 @@ def _get_metadata(self, headers): ) return request_metadata - async def is_server_live(self, headers=None, client_timeout=sys.maxint): + async def is_server_live(self, headers=None, client_timeout=sys.maxsize): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -156,7 +157,7 @@ async def is_server_live(self, headers=None, client_timeout=sys.maxint): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def is_server_ready(self, headers=None, timeout=sys.intmax): + async def is_server_ready(self, headers=None, client_timeout=sys.maxsize): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -173,7 +174,7 @@ async def is_server_ready(self, headers=None, timeout=sys.intmax): raise_error_grpc(rpc_error) async def is_model_ready( - self, model_name, model_version="", headers=None, client_timeout=sys.maxint + self, model_name, model_version="", headers=None, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -186,7 +187,7 @@ async def is_model_ready( if self._verbose: print("is_model_ready, metadata {}\n{}".format(metadata, request)) response = await self._client_stub.ModelReady( - request=request, metadata=metadata, timeout=sys.intmax + request=request, metadata=metadata, timeout=client_timeout ) if self._verbose: print(response) @@ -195,7 +196,7 @@ async def is_model_ready( raise_error_grpc(rpc_error) async def get_server_metadata( - self, headers=None, as_json=False, client_timeout=sys.maxint + self, headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -218,7 +219,7 @@ async def get_model_metadata( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -245,7 +246,7 @@ async def get_model_config( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -267,7 +268,7 @@ async def get_model_config( raise_error_grpc(rpc_error) async def get_model_repository_index( - self, headers=None, as_json=False, client_timeout=sys.maxint + self, headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -294,7 +295,7 @@ async def load_model( headers=None, config=None, files=None, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -325,7 +326,7 @@ async def unload_model( model_name, headers=None, unload_dependents=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -348,7 +349,7 @@ async def get_inference_statistics( model_version="", headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -379,7 +380,7 @@ async def update_trace_settings( settings={}, headers=None, as_json=False, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -409,7 +410,7 @@ async def update_trace_settings( raise_error_grpc(rpc_error) async def get_trace_settings( - self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxint + self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -429,7 +430,7 @@ async def get_trace_settings( raise_error_grpc(rpc_error) async def update_log_settings( - self, settings, headers=None, as_json=False, client_timeout=sys.maxint + self, settings, headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -458,7 +459,7 @@ async def update_log_settings( raise_error_grpc(rpc_error) async def get_log_settings( - self, headers=None, as_json=False, client_timeout=sys.maxint + self, headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -476,7 +477,7 @@ async def get_log_settings( raise_error_grpc(rpc_error) async def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -498,7 +499,7 @@ async def get_system_shared_memory_status( raise_error_grpc(rpc_error) async def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxint + self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -521,7 +522,7 @@ async def register_system_shared_memory( raise_error_grpc(rpc_error) async def unregister_system_shared_memory( - self, name="", headers=None, client_timeout=sys.maxint + self, name="", headers=None, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -547,7 +548,7 @@ async def unregister_system_shared_memory( raise_error_grpc(rpc_error) async def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxint + self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" @@ -576,7 +577,7 @@ async def register_cuda_shared_memory( device_id, byte_size, headers=None, - client_timeout=sys.maxint, + client_timeout=sys.maxsize, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -602,7 +603,7 @@ async def register_cuda_shared_memory( raise_error_grpc(rpc_error) async def unregister_cuda_shared_memory( - self, name="", headers=None, client_timeout=sys.maxint + self, name="", headers=None, client_timeout=sys.maxsize ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) From 2fd20e701d6906126698ae509ebfc1885bb2af2b Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Thu, 17 Aug 2023 18:02:46 -0700 Subject: [PATCH 04/10] update documentation --- .../library/tritonclient/grpc/_client.py | 201 ++++++++++++------ 1 file changed, 140 insertions(+), 61 deletions(-) diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 0070e1feb..1260dad8e 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -273,8 +273,12 @@ def is_server_live(self, headers=None, client_timeout=sys.maxsize): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -284,7 +288,7 @@ def is_server_live(self, headers=None, client_timeout=sys.maxsize): Raises ------ InferenceServerException - If unable to get liveness. + If unable to get liveness or has timed out. """ metadata = self._get_metadata(headers) @@ -309,9 +313,12 @@ def is_server_ready(self, headers=None, client_timeout=sys.maxsize): headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. - + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- bool @@ -320,7 +327,7 @@ def is_server_ready(self, headers=None, client_timeout=sys.maxsize): Raises ------ InferenceServerException - If unable to get readiness. + If unable to get readiness or has timed out. """ metadata = self._get_metadata(headers) @@ -353,8 +360,12 @@ def is_model_ready( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -364,7 +375,7 @@ def is_model_ready( Raises ------ InferenceServerException - If unable to get model readiness. + If unable to get model readiness or has timed out. """ metadata = self._get_metadata(headers) @@ -403,8 +414,12 @@ def get_server_metadata( are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns @@ -416,7 +431,7 @@ def get_server_metadata( Raises ------ InferenceServerException - If unable to get server metadata. + If unable to get server metadata or has timed out. """ metadata = self._get_metadata(headers) @@ -467,8 +482,12 @@ def get_model_metadata( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -479,7 +498,7 @@ def get_model_metadata( Raises ------ InferenceServerException - If unable to get model metadata. + If unable to get model metadata or has timed out. """ metadata = self._get_metadata(headers) @@ -534,8 +553,12 @@ def get_model_config( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -546,7 +569,7 @@ def get_model_config( Raises ------ InferenceServerException - If unable to get model configuration. + If unable to get model configuration or has timed out. """ metadata = self._get_metadata(headers) @@ -591,8 +614,12 @@ def get_model_repository_index( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -651,13 +678,17 @@ def load_model( The files will form the model directory that the model will be loaded from. If specified, 'config' must be provided to be the model configuration of the override model directory. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to load the model. + If unable to load the model or has timed out. """ metadata = self._get_metadata(headers) @@ -701,13 +732,17 @@ def unload_model( headers to include in the request. unload_dependents : bool Whether the dependents of the model should also be unloaded. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to unload the model. + If unable to unload the model or has timed out. """ metadata = self._get_metadata(headers) @@ -757,13 +792,17 @@ def get_inference_statistics( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to get the model inference statistics. + If unable to get the model inference statistics or has timed out. """ metadata = self._get_metadata(headers) @@ -827,8 +866,12 @@ def update_trace_settings( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -839,7 +882,7 @@ def update_trace_settings( Raises ------ InferenceServerException - If unable to update the trace settings. + If unable to update the trace settings or has timed out. """ metadata = self._get_metadata(headers) @@ -897,8 +940,12 @@ def get_trace_settings( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -909,7 +956,7 @@ def get_trace_settings( Raises ------ InferenceServerException - If unable to get the trace settings. + If unable to get the trace settings or has timed out. """ metadata = self._get_metadata(headers) @@ -955,8 +1002,12 @@ def update_log_settings( represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- dict or protobuf message @@ -965,7 +1016,7 @@ def update_log_settings( Raises ------ InferenceServerException - If unable to update the log settings. + If unable to update the log settings or has timed out. """ metadata = self._get_metadata(headers) try: @@ -1013,8 +1064,12 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxsi represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- dict or protobuf message @@ -1023,7 +1078,7 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxsi Raises ------ InferenceServerException - If unable to get the log settings. + If unable to get the log settings or has timed out. """ metadata = self._get_metadata(headers) try: @@ -1066,8 +1121,12 @@ def get_system_shared_memory_status( are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -1078,7 +1137,7 @@ def get_system_shared_memory_status( Raises ------ InferenceServerException - If unable to get the status of specified shared memory. + If unable to get the status of specified shared memory or has timed out. """ metadata = self._get_metadata(headers) @@ -1126,13 +1185,17 @@ def register_system_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to register the specified system shared memory. + If unable to register the specified system shared memory or has timed out. """ metadata = self._get_metadata(headers) @@ -1169,13 +1232,17 @@ def unregister_system_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to unregister the specified system shared memory region. + If unable to unregister the specified system shared memory region or has timed out. """ metadata = self._get_metadata(headers) @@ -1222,8 +1289,12 @@ def get_cuda_shared_memory_status( are represented as string. It is the caller's responsibility to convert these strings back to int64 values as necessary. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Returns ------- @@ -1234,7 +1305,7 @@ def get_cuda_shared_memory_status( Raises ------ InferenceServerException - If unable to get the status of specified shared memory. + If unable to get the status of specified shared memory or has timed out. """ @@ -1286,13 +1357,17 @@ def register_cuda_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to register the specified cuda shared memory. + If unable to register the specified cuda shared memory or has timed out. """ metadata = self._get_metadata(headers) @@ -1332,13 +1407,17 @@ def unregister_cuda_shared_memory( headers: dict Optional dictionary specifying additional HTTP headers to include in the request. - client_timeout: int - Optional timeout for the request. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. Raises ------ InferenceServerException - If unable to unregister the specified cuda shared memory region. + If unable to unregister the specified cuda shared memory region or has timed out. """ metadata = self._get_metadata(headers) @@ -1686,7 +1765,7 @@ def start_stream( ------ InferenceServerException If unable to start a stream or a stream was already running - for this client. + for this client or has timed out. """ if self._stream is not None: From 5afa192e263e7f48edea5d78697d9dc19a5781e3 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Wed, 23 Aug 2023 17:08:02 -0700 Subject: [PATCH 05/10] update timeout defaults to none --- src/c++/library/grpc_client.h | 90 +++++++++++-------- .../library/tritonclient/grpc/_client.py | 44 +++++---- .../library/tritonclient/grpc/aio/__init__.py | 42 +++++---- 3 files changed, 94 insertions(+), 82 deletions(-) diff --git a/src/c++/library/grpc_client.h b/src/c++/library/grpc_client.h index 56f1dfa56..c1d84f73d 100644 --- a/src/c++/library/grpc_client.h +++ b/src/c++/library/grpc_client.h @@ -156,8 +156,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param live Returns whether the server is live or not. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error IsServerLive( bool* live, const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -166,8 +167,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param ready Returns whether the server is ready or not. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error IsServerReady( bool* ready, const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -180,8 +182,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error IsModelReady( bool* ready, const std::string& model_name, const std::string& model_version = "", const Headers& headers = Headers(), @@ -192,8 +195,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// SeverMetadataResponse message. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error ServerMetadata( inference::ServerMetadataResponse* server_metadata, const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -207,8 +211,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error ModelMetadata( inference::ModelMetadataResponse* model_metadata, const std::string& model_name, const std::string& model_version = "", @@ -223,8 +228,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error ModelConfig( inference::ModelConfigResponse* model_config, const std::string& model_name, const std::string& model_version = "", @@ -236,8 +242,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// RepositoryIndexRequestResponse /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error ModelRepositoryIndex( inference::RepositoryIndexResponse* repository_index, const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -254,8 +261,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// The files will form the model directory that the model /// will be loaded from. If specified, 'config' must be provided to be /// the model configuration of the override model directory. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error LoadModel( const std::string& model_name, const Headers& headers = Headers(), const std::string& config = std::string(), @@ -266,8 +274,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param model_name The name of the model to be unloaded. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error UnloadModel( const std::string& model_name, const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -284,8 +293,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// choose a version based on the model and internal policy. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error ModelInferenceStatistics( inference::ModelStatisticsResponse* infer_stat, const std::string& model_name = "", const std::string& model_version = "", @@ -305,8 +315,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// loading the model. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error UpdateTraceSettings( inference::TraceSettingResponse* response, const std::string& model_name = "", @@ -322,8 +333,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error GetTraceSettings( inference::TraceSettingResponse* settings, const std::string& model_name = "", const Headers& headers = Headers(), @@ -338,8 +350,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// shared memory will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusResponse* status, const std::string& region_name = "", const Headers& headers = Headers(), @@ -355,8 +368,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// the start of the system shared memory region. The default value is zero. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request Error RegisterSystemSharedMemory( const std::string& name, const std::string& key, const size_t byte_size, const size_t offset = 0, const Headers& headers = Headers(), @@ -369,8 +383,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// unregistered. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request Error UnregisterSystemSharedMemory( const std::string& name = "", const Headers& headers = Headers(), const int timeout_ms = INT_MAX); @@ -384,8 +399,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// shared memory will be returned. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request. + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request. Error CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusResponse* status, const std::string& region_name = "", const Headers& headers = Headers(), @@ -401,8 +417,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// bytes. /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request Error RegisterCudaSharedMemory( const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle, const size_t device_id, const size_t byte_size, @@ -415,8 +432,9 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// unregistered. /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. - /// \param timeout_ms Optional timeout for API call. - /// \return Error object indicating success or failure of the request + /// \param timeout_ms Optional timeout for API call, in microseconds, the + /// request is allowed to take. \return Error object indicating success or + /// failure of the request Error UnregisterCudaSharedMemory( const std::string& name = "", const Headers& headers = Headers(), const int timeout_ms = INT_MAX); diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 1260dad8e..5724d6bbe 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -265,7 +265,7 @@ def close(self): self.stop_stream() self._channel.close() - def is_server_live(self, headers=None, client_timeout=sys.maxsize): + def is_server_live(self, headers=None, client_timeout=None): """Contact the inference server and get liveness. Parameters @@ -305,7 +305,7 @@ def is_server_live(self, headers=None, client_timeout=sys.maxsize): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def is_server_ready(self, headers=None, client_timeout=sys.maxsize): + def is_server_ready(self, headers=None, client_timeout=None): """Contact the inference server and get readiness. Parameters @@ -345,7 +345,7 @@ def is_server_ready(self, headers=None, client_timeout=sys.maxsize): raise_error_grpc(rpc_error) def is_model_ready( - self, model_name, model_version="", headers=None, client_timeout=sys.maxsize + self, model_name, model_version="", headers=None, client_timeout=None ): """Contact the inference server and get the readiness of specified model. @@ -396,9 +396,7 @@ def is_model_ready( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_server_metadata( - self, headers=None, as_json=False, client_timeout=sys.maxsize - ): + def get_server_metadata(self, headers=None, as_json=False, client_timeout=None): """Contact the inference server and get its metadata. Parameters @@ -459,7 +457,7 @@ def get_model_metadata( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Contact the inference server and get the metadata for specified model. @@ -530,7 +528,7 @@ def get_model_config( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Contact the inference server and get the configuration for specified model. @@ -596,7 +594,7 @@ def get_model_config( raise_error_grpc(rpc_error) def get_model_repository_index( - self, headers=None, as_json=False, client_timeout=sys.maxsize + self, headers=None, as_json=False, client_timeout=None ): """Get the index of model repository contents @@ -657,7 +655,7 @@ def load_model( headers=None, config=None, files=None, - client_timeout=sys.maxsize, + client_timeout=None, ): """Request the inference server to load or reload specified model. @@ -719,7 +717,7 @@ def unload_model( model_name, headers=None, unload_dependents=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Request the inference server to unload specified model. @@ -765,7 +763,7 @@ def get_inference_statistics( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Get the inference statistics for the specified model name and version. @@ -838,7 +836,7 @@ def update_trace_settings( settings={}, headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Update the trace settings for the specified model name, or global trace settings if model name is not given. @@ -917,7 +915,7 @@ def update_trace_settings( raise_error_grpc(rpc_error) def get_trace_settings( - self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxsize + self, model_name=None, headers=None, as_json=False, client_timeout=None ): """Get the trace settings for the specified model name, or global trace settings if model name is not given @@ -981,7 +979,7 @@ def get_trace_settings( raise_error_grpc(rpc_error) def update_log_settings( - self, settings, headers=None, as_json=False, client_timeout=sys.maxsize + self, settings, headers=None, as_json=False, client_timeout=None ): """Update the global log settings. Returns the log settings after the update. @@ -1048,7 +1046,7 @@ def update_log_settings( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxsize): + def get_log_settings(self, headers=None, as_json=False, client_timeout=None): """Get the global log settings. Parameters ---------- @@ -1100,7 +1098,7 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=sys.maxsi raise_error_grpc(rpc_error) def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize + self, region_name="", headers=None, as_json=False, client_timeout=None ): """Request system shared memory status from the server. @@ -1164,7 +1162,7 @@ def get_system_shared_memory_status( raise_error_grpc(rpc_error) def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxsize + self, name, key, byte_size, offset=0, headers=None, client_timeout=None ): """Request the server to register a system shared memory with the following specification. @@ -1218,7 +1216,7 @@ def register_system_shared_memory( raise_error_grpc(rpc_error) def unregister_system_shared_memory( - self, name="", headers=None, client_timeout=sys.maxsize + self, name="", headers=None, client_timeout=None ): """Request the server to unregister a system shared memory with the specified name. @@ -1268,7 +1266,7 @@ def unregister_system_shared_memory( raise_error_grpc(rpc_error) def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize + self, region_name="", headers=None, as_json=False, client_timeout=None ): """Request cuda shared memory status from the server. @@ -1339,7 +1337,7 @@ def register_cuda_shared_memory( device_id, byte_size, headers=None, - client_timeout=sys.maxsize, + client_timeout=None, ): """Request the server to register a system shared memory with the following specification. @@ -1392,9 +1390,7 @@ def register_cuda_shared_memory( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - def unregister_cuda_shared_memory( - self, name="", headers=None, client_timeout=sys.maxsize - ): + def unregister_cuda_shared_memory(self, name="", headers=None, client_timeout=None): """Request the server to unregister a cuda shared memory with the specified name. diff --git a/src/python/library/tritonclient/grpc/aio/__init__.py b/src/python/library/tritonclient/grpc/aio/__init__.py index 7ad714031..ecf7b95d1 100755 --- a/src/python/library/tritonclient/grpc/aio/__init__.py +++ b/src/python/library/tritonclient/grpc/aio/__init__.py @@ -141,7 +141,7 @@ def _get_metadata(self, headers): ) return request_metadata - async def is_server_live(self, headers=None, client_timeout=sys.maxsize): + async def is_server_live(self, headers=None, client_timeout=None): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -157,7 +157,7 @@ async def is_server_live(self, headers=None, client_timeout=sys.maxsize): except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def is_server_ready(self, headers=None, client_timeout=sys.maxsize): + async def is_server_ready(self, headers=None, client_timeout=None): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -174,7 +174,7 @@ async def is_server_ready(self, headers=None, client_timeout=sys.maxsize): raise_error_grpc(rpc_error) async def is_model_ready( - self, model_name, model_version="", headers=None, client_timeout=sys.maxsize + self, model_name, model_version="", headers=None, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -196,7 +196,7 @@ async def is_model_ready( raise_error_grpc(rpc_error) async def get_server_metadata( - self, headers=None, as_json=False, client_timeout=sys.maxsize + self, headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -219,7 +219,7 @@ async def get_model_metadata( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -246,7 +246,7 @@ async def get_model_config( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -268,7 +268,7 @@ async def get_model_config( raise_error_grpc(rpc_error) async def get_model_repository_index( - self, headers=None, as_json=False, client_timeout=sys.maxsize + self, headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -295,7 +295,7 @@ async def load_model( headers=None, config=None, files=None, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -326,7 +326,7 @@ async def unload_model( model_name, headers=None, unload_dependents=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -349,7 +349,7 @@ async def get_inference_statistics( model_version="", headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -380,7 +380,7 @@ async def update_trace_settings( settings={}, headers=None, as_json=False, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -410,7 +410,7 @@ async def update_trace_settings( raise_error_grpc(rpc_error) async def get_trace_settings( - self, model_name=None, headers=None, as_json=False, client_timeout=sys.maxsize + self, model_name=None, headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -430,7 +430,7 @@ async def get_trace_settings( raise_error_grpc(rpc_error) async def update_log_settings( - self, settings, headers=None, as_json=False, client_timeout=sys.maxsize + self, settings, headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -458,9 +458,7 @@ async def update_log_settings( except grpc.RpcError as rpc_error: raise_error_grpc(rpc_error) - async def get_log_settings( - self, headers=None, as_json=False, client_timeout=sys.maxsize - ): + async def get_log_settings(self, headers=None, as_json=False, client_timeout=None): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) try: @@ -477,7 +475,7 @@ async def get_log_settings( raise_error_grpc(rpc_error) async def get_system_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize + self, region_name="", headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -499,7 +497,7 @@ async def get_system_shared_memory_status( raise_error_grpc(rpc_error) async def register_system_shared_memory( - self, name, key, byte_size, offset=0, headers=None, client_timeout=sys.maxsize + self, name, key, byte_size, offset=0, headers=None, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -522,7 +520,7 @@ async def register_system_shared_memory( raise_error_grpc(rpc_error) async def unregister_system_shared_memory( - self, name="", headers=None, client_timeout=sys.maxsize + self, name="", headers=None, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -548,7 +546,7 @@ async def unregister_system_shared_memory( raise_error_grpc(rpc_error) async def get_cuda_shared_memory_status( - self, region_name="", headers=None, as_json=False, client_timeout=sys.maxsize + self, region_name="", headers=None, as_json=False, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" @@ -577,7 +575,7 @@ async def register_cuda_shared_memory( device_id, byte_size, headers=None, - client_timeout=sys.maxsize, + client_timeout=None, ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) @@ -603,7 +601,7 @@ async def register_cuda_shared_memory( raise_error_grpc(rpc_error) async def unregister_cuda_shared_memory( - self, name="", headers=None, client_timeout=sys.maxsize + self, name="", headers=None, client_timeout=None ): """Refer to tritonclient.grpc.InferenceServerClient""" metadata = self._get_metadata(headers) From 2ab7c225e337cb9c05b01597d92290e804cb3b37 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Thu, 2 Nov 2023 15:34:36 -0700 Subject: [PATCH 06/10] addressed comments --- src/c++/library/grpc_client.cc | 136 +++++++----------- src/c++/library/grpc_client.h | 90 +++++++----- .../library/tritonclient/grpc/_client.py | 1 - 3 files changed, 109 insertions(+), 118 deletions(-) diff --git a/src/c++/library/grpc_client.cc b/src/c++/library/grpc_client.cc index 151e9a339..fe91f5c17 100644 --- a/src/c++/library/grpc_client.cc +++ b/src/c++/library/grpc_client.cc @@ -145,6 +145,20 @@ GetStub( return stub; } + +/// Set client timeout +/// +/// \param client_timeout_ms Deadline for timeout in microseconds +/// \param context Client context to add deadline to +void +SetTimeout(const uint64_t& client_timeout_ms, grpc::ClientContext* context) +{ + if (client_timeout_ms != 0) { + auto deadline = std::chrono::system_clock::now() + + std::chrono::microseconds(client_timeout_ms); + context->set_deadline(deadline); + } +} } // namespace //============================================================================== @@ -480,7 +494,7 @@ InferenceServerGrpcClient::Create( Error InferenceServerGrpcClient::IsServerLive( - bool* live, const Headers& headers, const int timeout_ms) + bool* live, const Headers& headers, const uint64_t timeout_ms) { Error err; @@ -488,9 +502,7 @@ InferenceServerGrpcClient::IsServerLive( inference::ServerLiveResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -510,7 +522,7 @@ InferenceServerGrpcClient::IsServerLive( Error InferenceServerGrpcClient::IsServerReady( - bool* ready, const Headers& headers, const int timeout_ms) + bool* ready, const Headers& headers, const uint64_t timeout_ms) { Error err; @@ -518,9 +530,7 @@ InferenceServerGrpcClient::IsServerReady( inference::ServerReadyResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -542,7 +552,7 @@ Error InferenceServerGrpcClient::IsModelReady( bool* ready, const std::string& model_name, const std::string& model_version, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { Error err; @@ -550,9 +560,7 @@ InferenceServerGrpcClient::IsModelReady( inference::ModelReadyResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -580,7 +588,7 @@ InferenceServerGrpcClient::IsModelReady( Error InferenceServerGrpcClient::ServerMetadata( inference::ServerMetadataResponse* server_metadata, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { server_metadata->Clear(); Error err; @@ -588,9 +596,7 @@ InferenceServerGrpcClient::ServerMetadata( inference::ServerMetadataRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -613,7 +619,7 @@ Error InferenceServerGrpcClient::ModelMetadata( inference::ModelMetadataResponse* model_metadata, const std::string& model_name, const std::string& model_version, - const Headers& headers, const int timeout_ms) + const Headers& headers, const uint64_t timeout_ms) { model_metadata->Clear(); Error err; @@ -621,9 +627,7 @@ InferenceServerGrpcClient::ModelMetadata( inference::ModelMetadataRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -648,7 +652,7 @@ Error InferenceServerGrpcClient::ModelConfig( inference::ModelConfigResponse* model_config, const std::string& model_name, const std::string& model_version, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { model_config->Clear(); Error err; @@ -656,9 +660,7 @@ InferenceServerGrpcClient::ModelConfig( inference::ModelConfigRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -681,7 +683,7 @@ InferenceServerGrpcClient::ModelConfig( Error InferenceServerGrpcClient::ModelRepositoryIndex( inference::RepositoryIndexResponse* repository_index, - const Headers& headers, const int timeout_ms) + const Headers& headers, const uint64_t timeout_ms) { repository_index->Clear(); Error err; @@ -689,9 +691,7 @@ InferenceServerGrpcClient::ModelRepositoryIndex( inference::RepositoryIndexRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -713,7 +713,8 @@ Error InferenceServerGrpcClient::LoadModel( const std::string& model_name, const Headers& headers, const std::string& config, - const std::map>& files, const int timeout_ms) + const std::map>& files, + const uint64_t timeout_ms) { Error err; @@ -721,9 +722,7 @@ InferenceServerGrpcClient::LoadModel( inference::RepositoryModelLoadResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -751,7 +750,8 @@ InferenceServerGrpcClient::LoadModel( Error InferenceServerGrpcClient::UnloadModel( - const std::string& model_name, const Headers& headers, const int timeout_ms) + const std::string& model_name, const Headers& headers, + const uint64_t timeout_ms) { Error err; @@ -759,9 +759,7 @@ InferenceServerGrpcClient::UnloadModel( inference::RepositoryModelUnloadResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -784,7 +782,7 @@ Error InferenceServerGrpcClient::ModelInferenceStatistics( inference::ModelStatisticsResponse* infer_stat, const std::string& model_name, const std::string& model_version, - const Headers& headers, const int timeout_ms) + const Headers& headers, const uint64_t timeout_ms) { infer_stat->Clear(); Error err; @@ -792,9 +790,7 @@ InferenceServerGrpcClient::ModelInferenceStatistics( inference::ModelStatisticsRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -818,15 +814,13 @@ Error InferenceServerGrpcClient::UpdateTraceSettings( inference::TraceSettingResponse* response, const std::string& model_name, const std::map>& settings, - const Headers& headers, const int timeout_ms) + const Headers& headers, const uint64_t timeout_ms) { inference::TraceSettingRequest request; grpc::ClientContext context; Error err; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -861,7 +855,7 @@ InferenceServerGrpcClient::UpdateTraceSettings( Error InferenceServerGrpcClient::GetTraceSettings( inference::TraceSettingResponse* settings, const std::string& model_name, - const Headers& headers, const int timeout_ms) + const Headers& headers, const uint64_t timeout_ms) { settings->Clear(); Error err; @@ -869,9 +863,7 @@ InferenceServerGrpcClient::GetTraceSettings( inference::TraceSettingRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -895,7 +887,7 @@ Error InferenceServerGrpcClient::SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusResponse* status, const std::string& region_name, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { status->Clear(); Error err; @@ -903,9 +895,7 @@ InferenceServerGrpcClient::SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -927,7 +917,7 @@ InferenceServerGrpcClient::SystemSharedMemoryStatus( Error InferenceServerGrpcClient::RegisterSystemSharedMemory( const std::string& name, const std::string& key, const size_t byte_size, - const size_t offset, const Headers& headers, const int timeout_ms) + const size_t offset, const Headers& headers, const uint64_t timeout_ms) { Error err; @@ -935,9 +925,7 @@ InferenceServerGrpcClient::RegisterSystemSharedMemory( inference::SystemSharedMemoryRegisterResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -962,7 +950,7 @@ InferenceServerGrpcClient::RegisterSystemSharedMemory( Error InferenceServerGrpcClient::UnregisterSystemSharedMemory( - const std::string& name, const Headers& headers, const int timeout_ms) + const std::string& name, const Headers& headers, const uint64_t timeout_ms) { Error err; @@ -970,9 +958,7 @@ InferenceServerGrpcClient::UnregisterSystemSharedMemory( inference::SystemSharedMemoryUnregisterResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -1001,7 +987,7 @@ Error InferenceServerGrpcClient::CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusResponse* status, const std::string& region_name, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { status->Clear(); Error err; @@ -1009,9 +995,7 @@ InferenceServerGrpcClient::CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusRequest request; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -1034,7 +1018,7 @@ Error InferenceServerGrpcClient::RegisterCudaSharedMemory( const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle, const size_t device_id, const size_t byte_size, const Headers& headers, - const int timeout_ms) + const uint64_t timeout_ms) { Error err; @@ -1042,9 +1026,7 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory( inference::CudaSharedMemoryRegisterResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); } @@ -1069,7 +1051,7 @@ InferenceServerGrpcClient::RegisterCudaSharedMemory( Error InferenceServerGrpcClient::UnregisterCudaSharedMemory( - const std::string& name, const Headers& headers, const int timeout_ms) + const std::string& name, const Headers& headers, const uint64_t timeout_ms) { Error err; @@ -1077,9 +1059,7 @@ InferenceServerGrpcClient::UnregisterCudaSharedMemory( inference::CudaSharedMemoryUnregisterResponse response; grpc::ClientContext context; - auto deadline = - std::chrono::system_clock::now() + std::chrono::microseconds(timeout_ms); - context.set_deadline(deadline); + SetTimeout(timeout_ms, &context); for (const auto& it : headers) { context.AddMetadata(it.first, it.second); @@ -1127,9 +1107,7 @@ InferenceServerGrpcClient::Infer( } if (options.client_timeout_ != 0) { - auto deadline = std::chrono::system_clock::now() + - std::chrono::microseconds(options.client_timeout_); - context.set_deadline(deadline); + SetTimeout(options.client_timeout_, &context); } context.set_compression_algorithm(compression_algorithm); @@ -1191,9 +1169,7 @@ InferenceServerGrpcClient::AsyncInfer( } if (options.client_timeout_ != 0) { - auto deadline = std::chrono::system_clock::now() + - std::chrono::microseconds(options.client_timeout_); - async_request->grpc_context_.set_deadline(deadline); + SetTimeout(options.client_timeout_, &(async_request->grpc_context_)); } async_request->grpc_context_.set_compression_algorithm(compression_algorithm); @@ -1363,9 +1339,7 @@ InferenceServerGrpcClient::StartStream( } if (stream_timeout != 0) { - auto deadline = std::chrono::system_clock::now() + - std::chrono::microseconds(stream_timeout); - grpc_context_.set_deadline(deadline); + SetTimeout(stream_timeout, &grpc_context_); } grpc_context_.set_compression_algorithm(compression_algorithm); diff --git a/src/c++/library/grpc_client.h b/src/c++/library/grpc_client.h index c1d84f73d..cc90b12de 100644 --- a/src/c++/library/grpc_client.h +++ b/src/c++/library/grpc_client.h @@ -157,22 +157,24 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error IsServerLive( bool* live, const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get its readiness. /// \param ready Returns whether the server is ready or not. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error IsServerReady( bool* ready, const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get the readiness of specified model. /// \param ready Returns whether the specified model is ready or not. @@ -183,12 +185,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error IsModelReady( bool* ready, const std::string& model_name, const std::string& model_version = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get its metadata. /// \param server_metadata Returns the server metadata as @@ -196,11 +199,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error ServerMetadata( inference::ServerMetadataResponse* server_metadata, - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Contact the inference server and get the metadata of specified model. /// \param model_metadata Returns model metadata as ModelMetadataResponse @@ -212,12 +216,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error ModelMetadata( inference::ModelMetadataResponse* model_metadata, const std::string& model_name, const std::string& model_version = "", - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Contact the inference server and get the configuration of specified model. /// \param model_config Returns model config as ModelConfigResponse @@ -229,12 +234,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error ModelConfig( inference::ModelConfigResponse* model_config, const std::string& model_name, const std::string& model_version = "", - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Contact the inference server and get the index of model repository /// contents. @@ -243,11 +249,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error ModelRepositoryIndex( inference::RepositoryIndexResponse* repository_index, - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Request the inference server to load or reload specified model. /// \param model_name The name of the model to be loaded or reloaded. @@ -262,24 +269,26 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// will be loaded from. If specified, 'config' must be provided to be /// the model configuration of the override model directory. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error LoadModel( const std::string& model_name, const Headers& headers = Headers(), const std::string& config = std::string(), const std::map>& files = {}, - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Request the inference server to unload specified model. /// \param model_name The name of the model to be unloaded. /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error UnloadModel( const std::string& model_name, const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get the inference statistics for the /// specified model name and version. @@ -294,12 +303,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error ModelInferenceStatistics( inference::ModelStatisticsResponse* infer_stat, const std::string& model_name = "", const std::string& model_version = "", - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Update the trace settings for the specified model name, or global trace /// settings if model name is not given. @@ -316,14 +326,15 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error UpdateTraceSettings( inference::TraceSettingResponse* response, const std::string& model_name = "", const std::map>& settings = std::map>(), - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Get the trace settings for the specified model name, or global trace /// settings if model name is not given. @@ -334,12 +345,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error GetTraceSettings( inference::TraceSettingResponse* settings, const std::string& model_name = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get the status for requested system /// shared memory. @@ -351,12 +363,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error SystemSharedMemoryStatus( inference::SystemSharedMemoryStatusResponse* status, const std::string& region_name = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Request the server to register a system shared memory with the provided /// details. @@ -369,12 +382,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request Error RegisterSystemSharedMemory( const std::string& name, const std::string& key, const size_t byte_size, const size_t offset = 0, const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Request the server to unregister a system shared memory with the /// specified name. @@ -384,11 +398,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request Error UnregisterSystemSharedMemory( const std::string& name = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Contact the inference server and get the status for requested CUDA /// shared memory. @@ -400,12 +415,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to include /// in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request. Error CudaSharedMemoryStatus( inference::CudaSharedMemoryStatusResponse* status, const std::string& region_name = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Request the server to register a CUDA shared memory with the provided /// details. @@ -418,12 +434,13 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request Error RegisterCudaSharedMemory( const std::string& name, const cudaIpcMemHandle_t& cuda_shm_handle, const size_t device_id, const size_t byte_size, - const Headers& headers = Headers(), const int timeout_ms = INT_MAX); + const Headers& headers = Headers(), const uint64_t timeout_ms = 0); /// Request the server to unregister a CUDA shared memory with the /// specified name. @@ -433,11 +450,12 @@ class InferenceServerGrpcClient : public InferenceServerClient { /// \param headers Optional map specifying additional HTTP headers to /// include in the metadata of gRPC request. /// \param timeout_ms Optional timeout for API call, in microseconds, the - /// request is allowed to take. \return Error object indicating success or + /// request is allowed to take. + /// \return Error object indicating success or /// failure of the request Error UnregisterCudaSharedMemory( const std::string& name = "", const Headers& headers = Headers(), - const int timeout_ms = INT_MAX); + const uint64_t timeout_ms = 0); /// Run synchronous inference on server. /// \param result Returns the result of inference. diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 5724d6bbe..4b8488dd4 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -28,7 +28,6 @@ import base64 import struct -import sys import grpc import rapidjson as json From 2302ba89ea57b4e51ddb3fff12955a17679dcf23 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Mon, 6 Nov 2023 18:28:40 -0800 Subject: [PATCH 07/10] add timeout test --- src/c++/tests/client_timeout_test.cc | 54 ++++++++++++++++++++-------- 1 file changed, 40 insertions(+), 14 deletions(-) diff --git a/src/c++/tests/client_timeout_test.cc b/src/c++/tests/client_timeout_test.cc index 71226da53..d38af72d7 100644 --- a/src/c++/tests/client_timeout_test.cc +++ b/src/c++/tests/client_timeout_test.cc @@ -47,6 +47,22 @@ namespace tc = triton::client; namespace { +void +TestTimeoutAPIs( + const uint64_t timeout_ms, const std::string& name, + std::unique_ptr& grpc_client) +{ + std::cout << "testing other apis" << std::endl; + std::map headers; + FAIL_IF_ERR( + grpc_client->LoadModel(name, headers, "", {}, timeout_ms), + "Could not load model"); + bool isReady = true; + FAIL_IF_ERR( + grpc_client->IsModelReady(&isReady, name, "", headers, timeout_ms), + "Could not get model ready information"); +} + void ValidateShapeAndDatatype( const std::string& name, std::shared_ptr result) @@ -109,11 +125,11 @@ void RunSynchronousInference( std::unique_ptr& grpc_client, std::unique_ptr& http_client, - uint32_t client_timeout, std::vector& inputs, + uint32_t client_timeout_ms, std::vector& inputs, std::vector& outputs, tc::InferOptions& options, std::vector& input0_data) { - options.client_timeout_ = client_timeout; + options.client_timeout_ = client_timeout_ms; tc::InferResult* results; if (grpc_client.get() != nullptr) { FAIL_IF_ERR( @@ -141,7 +157,7 @@ void RunAsynchronousInference( std::unique_ptr& grpc_client, std::unique_ptr& http_client, - uint32_t client_timeout, std::vector& inputs, + uint32_t client_timeout_ms, std::vector& inputs, std::vector& outputs, tc::InferOptions& options, std::vector& input0_data) { @@ -167,7 +183,7 @@ RunAsynchronousInference( cv.notify_all(); }; - options.client_timeout_ = client_timeout; + options.client_timeout_ = client_timeout_ms; if (grpc_client.get() != nullptr) { FAIL_IF_ERR( grpc_client->AsyncInfer(callback, options, inputs, outputs), @@ -188,7 +204,7 @@ RunAsynchronousInference( void RunStreamingInference( std::unique_ptr& grpc_client, - uint32_t client_timeout, std::vector& inputs, + uint32_t client_timeout_ms, std::vector& inputs, std::vector& outputs, tc::InferOptions& options, std::vector& input0_data) { @@ -206,13 +222,13 @@ RunStreamingInference( } cv.notify_all(); }, - false /*ship_stats*/, client_timeout), + false /*ship_stats*/, client_timeout_ms), "Failed to start the stream"); FAIL_IF_ERR( grpc_client->AsyncStreamInfer(options, inputs), "unable to run model"); - auto timeout = std::chrono::microseconds(client_timeout); + auto timeout = std::chrono::microseconds(client_timeout_ms); // Wait until all callbacks are invoked or the timeout expires { std::unique_lock lk(mtx); @@ -263,11 +279,12 @@ main(int argc, char** argv) std::string url; bool async = false; bool streaming = false; - uint32_t client_timeout = 0; + uint32_t client_timeout_ms = 0; + bool test_client_apis = false; // Parse commandline... int opt; - while ((opt = getopt(argc, argv, "vi:u:ast:")) != -1) { + while ((opt = getopt(argc, argv, "vi:u:ast:p")) != -1) { switch (opt) { case 'v': verbose = true; @@ -292,7 +309,10 @@ main(int argc, char** argv) streaming = true; break; case 't': - client_timeout = std::stoi(optarg); + client_timeout_ms = std::stoi(optarg); + break; + case 'p': + test_client_apis = true; break; case '?': Usage(argv); @@ -335,6 +355,12 @@ main(int argc, char** argv) "unable to create grpc client"); } + // Test server timeouts for grpc client + if (protocol == "grpc" && test_client_apis) { + TestTimeoutAPIs(client_timeout_ms, model_name, grpc_client); + return 0; + } + // Initialize the tensor data std::vector input0_data(16); for (size_t i = 0; i < 16; ++i) { @@ -370,7 +396,7 @@ main(int argc, char** argv) // The inference settings. Will be using default for now. tc::InferOptions options(model_name); options.model_version_ = model_version; - options.client_timeout_ = client_timeout; + options.client_timeout_ = client_timeout_ms; std::vector inputs = {input0_ptr.get()}; std::vector outputs = {output0_ptr.get()}; @@ -378,14 +404,14 @@ main(int argc, char** argv) // Send inference request to the inference server. if (streaming) { RunStreamingInference( - grpc_client, client_timeout, inputs, outputs, options, input0_data); + grpc_client, client_timeout_ms, inputs, outputs, options, input0_data); } else if (async) { RunAsynchronousInference( - grpc_client, http_client, client_timeout, inputs, outputs, options, + grpc_client, http_client, client_timeout_ms, inputs, outputs, options, input0_data); } else { RunSynchronousInference( - grpc_client, http_client, client_timeout, inputs, outputs, options, + grpc_client, http_client, client_timeout_ms, inputs, outputs, options, input0_data); } From d58bd4bdb785bac18b4e2b7ce049114655a045bf Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Wed, 8 Nov 2023 18:49:10 -0800 Subject: [PATCH 08/10] finalize testing --- src/c++/tests/client_timeout_test.cc | 106 ++++++++++++++++-- .../library/tritonclient/grpc/_client.py | 6 + 2 files changed, 104 insertions(+), 8 deletions(-) diff --git a/src/c++/tests/client_timeout_test.cc b/src/c++/tests/client_timeout_test.cc index d38af72d7..91a8e2796 100644 --- a/src/c++/tests/client_timeout_test.cc +++ b/src/c++/tests/client_timeout_test.cc @@ -45,22 +45,112 @@ namespace tc = triton::client; } \ } +#define COUNT_ERROR_MSGS(X, MSG, CNT) \ + { \ + tc::Error err = (X); \ + if (!err.IsOk()) { \ + std::cout << "error: " << (MSG) << ": " << err << std::endl; \ + ++CNT; \ + } \ + } + namespace { void TestTimeoutAPIs( - const uint64_t timeout_ms, const std::string& name, + const uint64_t timeout_ms, const std::string& model_name, std::unique_ptr& grpc_client) { std::cout << "testing other apis" << std::endl; + bool success = false; std::map headers; - FAIL_IF_ERR( - grpc_client->LoadModel(name, headers, "", {}, timeout_ms), - "Could not load model"); - bool isReady = true; - FAIL_IF_ERR( - grpc_client->IsModelReady(&isReady, name, "", headers, timeout_ms), - "Could not get model ready information"); + inference::ServerMetadataResponse server_metadata; + inference::ModelMetadataResponse model_metadata; + inference::ModelConfigResponse model_config; + inference::RepositoryIndexResponse repository_index; + inference::ModelStatisticsResponse infer_stat; + inference::TraceSettingResponse response; + std::map> settings; + inference::TraceSettingResponse trace_settings; + inference::SystemSharedMemoryStatusResponse shmstatus; + size_t byte_size; + std::string memory_name = ""; + inference::CudaSharedMemoryStatusResponse cuda_shmstatus; + cudaIpcMemHandle_t cuda_shm_handle; + size_t count = 0; + + COUNT_ERROR_MSGS( + grpc_client->IsServerLive(&success, headers, timeout_ms), + "Failed on IsServerLive", count); + COUNT_ERROR_MSGS( + grpc_client->IsServerReady(&success, headers, timeout_ms), + "Failed on IsServerReady", count); + COUNT_ERROR_MSGS( + grpc_client->IsModelReady(&success, model_name, "", headers, timeout_ms), + "Failed on IsModelReady", count); + COUNT_ERROR_MSGS( + grpc_client->ServerMetadata(&server_metadata, headers, timeout_ms), + "Failed on ServerMetadata", count); + COUNT_ERROR_MSGS( + grpc_client->ModelMetadata( + &model_metadata, model_name, "", headers, timeout_ms), + "Failed on ModelMetadata", count); + COUNT_ERROR_MSGS( + grpc_client->ModelConfig( + &model_config, model_name, "", headers, timeout_ms), + "Failed on ModelConfig", count); + COUNT_ERROR_MSGS( + grpc_client->ModelRepositoryIndex(&repository_index, headers, timeout_ms), + "Failed on ModelRepositoryIndex", count); + COUNT_ERROR_MSGS( + grpc_client->ModelInferenceStatistics( + &infer_stat, model_name, "", headers, timeout_ms), + "Failed on ModelInferenceStatistics", count); + COUNT_ERROR_MSGS( + grpc_client->LoadModel(model_name, headers, "", {}, timeout_ms), + "Failed on LoadModel", count); + COUNT_ERROR_MSGS( + grpc_client->UnloadModel(model_name, headers, timeout_ms), + "Failed on UnloadModel", count); + + COUNT_ERROR_MSGS( + grpc_client->UpdateTraceSettings( + &response, model_name, settings, headers, timeout_ms), + "Failed on UpdateTraceSettings", count); + COUNT_ERROR_MSGS( + grpc_client->GetTraceSettings( + &trace_settings, model_name, headers, timeout_ms), + "Failed on GetTraceSettings", count); + COUNT_ERROR_MSGS( + grpc_client->SystemSharedMemoryStatus( + &shmstatus, memory_name, headers, timeout_ms), + "Failed on SystemSharedMemoryStatus", count); + COUNT_ERROR_MSGS( + grpc_client->RegisterSystemSharedMemory( + memory_name, memory_name, byte_size, 0, headers, timeout_ms), + "Failed on RegisterSystemSharedMemory", count); + COUNT_ERROR_MSGS( + grpc_client->UnregisterSystemSharedMemory( + memory_name, headers, timeout_ms), + "Failed on UnregisterSystemSharedMemory", count); + COUNT_ERROR_MSGS( + grpc_client->CudaSharedMemoryStatus( + &cuda_shmstatus, "", headers, timeout_ms), + "Failed on CudaSharedMemoryStatus", count); + COUNT_ERROR_MSGS( + grpc_client->RegisterCudaSharedMemory( + model_name, cuda_shm_handle, 0, byte_size, headers, timeout_ms), + "Failed on RegisterCudaSharedMemory", count); + COUNT_ERROR_MSGS( + grpc_client->UnregisterCudaSharedMemory(memory_name, headers, timeout_ms), + "Failed on UnregisterSystemSharedMemory", count); + if (count > 0 && count == 18) { + std::cerr << "error count: " << count + << " which is not 0 nor expected number of APIs that are " + "expected to fail (18)" + << std::endl; + exit(1); + } } void diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 4b8488dd4..90904acf1 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -1644,6 +1644,12 @@ def async_infer( error with message "Deadline Exceeded" in the callback when the specified time elapses. The default value is None which means client will wait for the response from the server. + client_timeout: float + The maximum end-to-end time, in seconds, the request is allowed + to take. The client will abort request and raise + InferenceServerExeption with message "Deadline Exceeded" when the + specified time elapses. The default value is None which means + client will wait for the response from the server. headers: dict Optional dictionary specifying additional HTTP headers to include in the request. From 3c1a7debc11367a9a0f21f650448257af709ab68 Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Thu, 9 Nov 2023 10:08:18 -0800 Subject: [PATCH 09/10] fix logic --- src/c++/tests/client_timeout_test.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/tests/client_timeout_test.cc b/src/c++/tests/client_timeout_test.cc index 91a8e2796..13e6ab5ec 100644 --- a/src/c++/tests/client_timeout_test.cc +++ b/src/c++/tests/client_timeout_test.cc @@ -144,7 +144,7 @@ TestTimeoutAPIs( COUNT_ERROR_MSGS( grpc_client->UnregisterCudaSharedMemory(memory_name, headers, timeout_ms), "Failed on UnregisterSystemSharedMemory", count); - if (count > 0 && count == 18) { + if (count > 0 && count != 18) { std::cerr << "error count: " << count << " which is not 0 nor expected number of APIs that are " "expected to fail (18)" From f3ce4e1f2feb61426fb090424e31433da9c5fb5e Mon Sep 17 00:00:00 2001 From: Katherine Yang Date: Thu, 9 Nov 2023 15:54:30 -0800 Subject: [PATCH 10/10] addressed more comments --- src/c++/tests/client_timeout_test.cc | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/src/c++/tests/client_timeout_test.cc b/src/c++/tests/client_timeout_test.cc index 13e6ab5ec..e0d8d002a 100644 --- a/src/c++/tests/client_timeout_test.cc +++ b/src/c++/tests/client_timeout_test.cc @@ -144,11 +144,8 @@ TestTimeoutAPIs( COUNT_ERROR_MSGS( grpc_client->UnregisterCudaSharedMemory(memory_name, headers, timeout_ms), "Failed on UnregisterSystemSharedMemory", count); - if (count > 0 && count != 18) { - std::cerr << "error count: " << count - << " which is not 0 nor expected number of APIs that are " - "expected to fail (18)" - << std::endl; + if (count != 0) { + std::cerr << "error count: " << count << " which is not 0 " << std::endl; exit(1); } }