diff --git a/src/python/library/tritonclient/_plugin.py b/src/python/library/tritonclient/_plugin.py index 9353b6730..037964874 100755 --- a/src/python/library/tritonclient/_plugin.py +++ b/src/python/library/tritonclient/_plugin.py @@ -30,7 +30,8 @@ class InferenceServerClientPlugin(ABC): """Every Triton Client Plugin should extend this class. - Each plugin needs to implement the `__call__` method. + Each plugin needs to implement the :py:meth:`__call__` method. + """ @abstractmethod @@ -42,5 +43,6 @@ def __call__(self, request): ---------- request : Request The request object. + """ pass diff --git a/src/python/library/tritonclient/_request.py b/src/python/library/tritonclient/_request.py index 09d574497..6aea22832 100755 --- a/src/python/library/tritonclient/_request.py +++ b/src/python/library/tritonclient/_request.py @@ -29,7 +29,7 @@ class Request: """A request object. - Attributes + Parameters ---------- headers : dict A dictionary containing the request headers. diff --git a/src/python/library/tritonclient/grpc/__init__.py b/src/python/library/tritonclient/grpc/__init__.py index 852d5f0d6..5a0d02176 100755 --- a/src/python/library/tritonclient/grpc/__init__.py +++ b/src/python/library/tritonclient/grpc/__init__.py @@ -32,6 +32,7 @@ from tritonclient.utils import * from .._plugin import InferenceServerClientPlugin + from .._request import Request from ._client import MAX_GRPC_MESSAGE_SIZE, InferenceServerClient, KeepAliveOptions from ._infer_input import InferInput from ._infer_result import InferResult @@ -59,3 +60,14 @@ "use versions <1.43.0 or >=1.51.1 to avoid leaks " "(see https://github.com/grpc/grpc/issues/28513)." ) + +__all__ = [ + "InferenceServerClientPlugin", + "Request", + "InferenceServerClient", + "InferInput", + "InferRequestedOutput", + "InferResult", + "KeepAliveOptions", + "InferenceServerException" +] diff --git a/src/python/library/tritonclient/grpc/_client.py b/src/python/library/tritonclient/grpc/_client.py index 90904acf1..1c68115c4 100755 --- a/src/python/library/tritonclient/grpc/_client.py +++ b/src/python/library/tritonclient/grpc/_client.py @@ -982,6 +982,7 @@ def update_log_settings( ): """Update the global log settings. Returns the log settings after the update. + Parameters ---------- settings: dict @@ -1005,11 +1006,13 @@ def update_log_settings( InferenceServerExeption with message "Deadline Exceeded" when the specified time elapses. The default value is None which means client will wait for the response from the server. + Returns ------- dict or protobuf message The JSON dict or LogSettingsResponse message holding the updated log settings. + Raises ------ InferenceServerException @@ -1047,6 +1050,7 @@ def update_log_settings( def get_log_settings(self, headers=None, as_json=False, client_timeout=None): """Get the global log settings. + Parameters ---------- headers: dict @@ -1067,15 +1071,18 @@ def get_log_settings(self, headers=None, as_json=False, client_timeout=None): InferenceServerExeption with message "Deadline Exceeded" when the specified time elapses. The default value is None which means client will wait for the response from the server. + Returns ------- dict or protobuf message The JSON dict or LogSettingsResponse message holding the log settings. + Raises ------ InferenceServerException If unable to get the log settings or has timed out. + """ metadata = self._get_metadata(headers) try: @@ -1460,14 +1467,14 @@ def infer( model_name: str The name of the model to run inference. inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. model_version : str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id : str @@ -1590,12 +1597,12 @@ def async_infer( model_name: str The name of the model to run inference. inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. callback : function Python function that is invoked once the request is completed. The function must reserve the last two arguments (result, error) - to hold InferResult and InferenceServerException + to hold :py:class:`InferResult` and :py:class:`InferenceServerException` objects respectively which will be provided to the function when executing the callback. The ownership of these objects will be given to the user. The 'error' would be None for a successful inference. @@ -1604,7 +1611,7 @@ def async_infer( is an empty string which means then the server will choose a version based on the model and internal policy. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id : str @@ -1668,13 +1675,13 @@ def async_infer( Computations represented by a Future may be yet to be begun, ongoing, or have already completed. + Note + ---- This object can be used to cancel the inference request like below: - ---------- - future = async_infer(...) - ret = future.cancel() - ---------- + >>> future = async_infer(...) + >>> ret = future.cancel() Raises ------ @@ -1745,8 +1752,8 @@ def start_stream( callback : function Python function that is invoked upon receiving response from the underlying stream. The function must reserve the last two - arguments (result, error) to hold InferResult and - InferenceServerException objects respectively + arguments (result, error) to hold :py:class:`InferResult` and + :py:class:`InferenceServerException` objects respectively which will be provided to the function when executing the callback. The ownership of these objects will be given to the user. The 'error' would be None for a successful inference. @@ -1830,14 +1837,14 @@ def async_stream_infer( model_name: str The name of the model to run inference. inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. model_version: str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id : str diff --git a/src/python/library/tritonclient/grpc/_infer_result.py b/src/python/library/tritonclient/grpc/_infer_result.py index 2369d3bcf..ff3275b02 100755 --- a/src/python/library/tritonclient/grpc/_infer_result.py +++ b/src/python/library/tritonclient/grpc/_infer_result.py @@ -32,7 +32,7 @@ class InferResult: - """An object of InferResult class holds the response of + """An object of :py:class:`InferResult` class holds the response of an inference request and provide methods to retrieve inference results. diff --git a/src/python/library/tritonclient/grpc/_infer_stream.py b/src/python/library/tritonclient/grpc/_infer_stream.py index 74e668d5e..92ec05890 100755 --- a/src/python/library/tritonclient/grpc/_infer_stream.py +++ b/src/python/library/tritonclient/grpc/_infer_stream.py @@ -45,11 +45,13 @@ class _InferStream: callback : function Python function that is invoked upon receiving response from the underlying stream. The function must reserve the last two - arguments (result, error) to hold InferResult and - InferenceServerException objects respectively which will be + arguments (result, error) to hold :py:class:`InferResult` and + :py:class:`InferenceServerException` objects respectively which will be provided to the function when executing the callback. The ownership of these objects will be given to the user. The 'error' would be None for a successful inference. + verbose : bool + Enables verbose mode if set True. """ def __init__(self, callback, verbose): diff --git a/src/python/library/tritonclient/grpc/_requested_output.py b/src/python/library/tritonclient/grpc/_requested_output.py index e2d446c58..442093c15 100755 --- a/src/python/library/tritonclient/grpc/_requested_output.py +++ b/src/python/library/tritonclient/grpc/_requested_output.py @@ -31,7 +31,7 @@ class InferRequestedOutput: - """An object of InferRequestedOutput class is used to describe a + """An object of :py:class:`InferRequestedOutput` class is used to describe a requested output tensor for an inference request. Parameters @@ -89,7 +89,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): def unset_shared_memory(self): """Clears the shared memory option set by the last call to - InferRequestedOutput.set_shared_memory(). After call to this + :py:meth:`InferRequestedOutput.set_shared_memory()`. After call to this function requested output will no longer be returned in a shared memory region. """ diff --git a/src/python/library/tritonclient/grpc/_utils.py b/src/python/library/tritonclient/grpc/_utils.py index 1ea8450d2..8dd48f785 100755 --- a/src/python/library/tritonclient/grpc/_utils.py +++ b/src/python/library/tritonclient/grpc/_utils.py @@ -32,7 +32,7 @@ def get_error_grpc(rpc_error): - """Convert a gRPC error to an InferenceServerException. + """Convert a gRPC error to an :py:class:`InferenceServerException`. Parameters ---------- @@ -51,7 +51,7 @@ def get_error_grpc(rpc_error): def get_cancelled_error(msg=None): - """Get InferenceServerException object for a cancelled RPC. + """Get :py:class:`InferenceServerException` object for a cancelled RPC. Returns ------- @@ -63,7 +63,7 @@ def get_cancelled_error(msg=None): def raise_error_grpc(rpc_error): - """Raise an InferenceServerException from a gRPC error. + """Raise an :py:class:`InferenceServerException` from a gRPC error. Parameters ---------- diff --git a/src/python/library/tritonclient/grpc/aio/__init__.py b/src/python/library/tritonclient/grpc/aio/__init__.py index 4a2067530..3b4c949df 100755 --- a/src/python/library/tritonclient/grpc/aio/__init__.py +++ b/src/python/library/tritonclient/grpc/aio/__init__.py @@ -43,7 +43,7 @@ class InferenceServerClient(InferenceServerClientBase): """This feature is currently in beta and may be subject to change. - An analogy of the tritonclient.grpc.InferenceServerClient to enable + An analogy of the :py:class:`tritonclient.grpc.InferenceServerClient` to enable calling via asyncio syntax. The object is intended to be used by a single thread and simultaneously calling methods with different threads is not supported and can cause undefined behavior. @@ -142,7 +142,7 @@ def _get_metadata(self, headers): return request_metadata async def is_server_live(self, headers=None, client_timeout=None): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_live`""" metadata = self._get_metadata(headers) try: request = service_pb2.ServerLiveRequest() @@ -158,7 +158,7 @@ async def is_server_live(self, headers=None, client_timeout=None): raise_error_grpc(rpc_error) async def is_server_ready(self, headers=None, client_timeout=None): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_server_ready`""" metadata = self._get_metadata(headers) try: request = service_pb2.ServerReadyRequest() @@ -176,7 +176,7 @@ async def is_server_ready(self, headers=None, client_timeout=None): async def is_model_ready( self, model_name, model_version="", headers=None, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.is_model_ready`""" metadata = self._get_metadata(headers) try: if type(model_version) != str: @@ -198,7 +198,7 @@ async def is_model_ready( async def get_server_metadata( self, headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_server_metadata`""" metadata = self._get_metadata(headers) try: request = service_pb2.ServerMetadataRequest() @@ -221,7 +221,7 @@ async def get_model_metadata( as_json=False, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_metadata`""" metadata = self._get_metadata(headers) try: if type(model_version) != str: @@ -248,7 +248,7 @@ async def get_model_config( as_json=False, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_config`""" metadata = self._get_metadata(headers) try: if type(model_version) != str: @@ -270,7 +270,7 @@ async def get_model_config( async def get_model_repository_index( self, headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_model_repository_index`""" metadata = self._get_metadata(headers) try: request = service_pb2.RepositoryIndexRequest() @@ -297,7 +297,7 @@ async def load_model( files=None, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.load_model`""" metadata = self._get_metadata(headers) try: request = service_pb2.RepositoryModelLoadRequest(model_name=model_name) @@ -328,7 +328,7 @@ async def unload_model( unload_dependents=False, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.unload_model`""" metadata = self._get_metadata(headers) try: request = service_pb2.RepositoryModelUnloadRequest(model_name=model_name) @@ -351,7 +351,7 @@ async def get_inference_statistics( as_json=False, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to ::py:meth:`tritonclient.grpc.InferenceServerClient.get_inference_statistics`""" metadata = self._get_metadata(headers) try: if type(model_version) != str: @@ -382,7 +382,7 @@ async def update_trace_settings( as_json=False, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_trace_settings`""" metadata = self._get_metadata(headers) try: request = service_pb2.TraceSettingRequest() @@ -412,7 +412,7 @@ async def update_trace_settings( async def get_trace_settings( self, model_name=None, headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_trace_settings`""" metadata = self._get_metadata(headers) try: request = service_pb2.TraceSettingRequest() @@ -432,7 +432,7 @@ async def get_trace_settings( async def update_log_settings( self, settings, headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.update_log_settings`""" metadata = self._get_metadata(headers) try: request = service_pb2.LogSettingsRequest() @@ -459,7 +459,7 @@ async def update_log_settings( raise_error_grpc(rpc_error) async def get_log_settings(self, headers=None, as_json=False, client_timeout=None): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_log_settings`""" metadata = self._get_metadata(headers) try: request = service_pb2.LogSettingsRequest() @@ -477,7 +477,7 @@ async def get_log_settings(self, headers=None, as_json=False, client_timeout=Non async def get_system_shared_memory_status( self, region_name="", headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_system_shared_memory_status`""" metadata = self._get_metadata(headers) try: request = service_pb2.SystemSharedMemoryStatusRequest(name=region_name) @@ -499,7 +499,7 @@ async def get_system_shared_memory_status( async def register_system_shared_memory( self, name, key, byte_size, offset=0, headers=None, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.register_system_shared_memory`""" metadata = self._get_metadata(headers) try: request = service_pb2.SystemSharedMemoryRegisterRequest( @@ -522,7 +522,7 @@ async def register_system_shared_memory( async def unregister_system_shared_memory( self, name="", headers=None, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_system_shared_memory`""" metadata = self._get_metadata(headers) try: request = service_pb2.SystemSharedMemoryUnregisterRequest(name=name) @@ -548,7 +548,7 @@ async def unregister_system_shared_memory( async def get_cuda_shared_memory_status( self, region_name="", headers=None, as_json=False, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.get_cuda_shared_memory_status`""" metadata = self._get_metadata(headers) try: @@ -577,7 +577,7 @@ async def register_cuda_shared_memory( headers=None, client_timeout=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.register_cuda_shared_memory`""" metadata = self._get_metadata(headers) try: request = service_pb2.CudaSharedMemoryRegisterRequest( @@ -603,7 +603,7 @@ async def register_cuda_shared_memory( async def unregister_cuda_shared_memory( self, name="", headers=None, client_timeout=None ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.unregister_cuda_shared_memory`""" metadata = self._get_metadata(headers) try: request = service_pb2.CudaSharedMemoryUnregisterRequest(name=name) @@ -641,7 +641,7 @@ async def infer( compression_algorithm=None, parameters=None, ): - """Refer to tritonclient.grpc.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.grpc.InferenceServerClient.infer`""" metadata = self._get_metadata(headers) @@ -692,8 +692,8 @@ def stream_infer( ---------- inputs_iterator : asynchronous iterator Async iterator that yields a dict(s) consists of the input - parameters to the async_stream_infer function defined in - tritonclient.grpc.InferenceServerClient. + parameters to the :py:meth:`tritonclient.grpc.InferenceServerClient.async_stream_infer` function defined in + :py:class:`tritonclient.grpc.InferenceServerClient`. stream_timeout : float Optional stream timeout. The stream will be closed once the specified timeout expires. @@ -708,17 +708,18 @@ def stream_infer( Returns ------- asynchronous iterator - Yield tuple holding (InferResult, InferenceServerException) objects. + Yield tuple holding (:py:class:`tritonclient.grpc.InferResult`, :py:class:`tritonclient.grpc.InferenceServerException`) objects. + Note + ---- This object can be used to cancel the inference request like below: - ---------- - it = stream_infer(...) - ret = it.cancel() - ---------- + + >>> it = stream_infer(...) + >>> ret = it.cancel() Raises ------ - InferenceServerException + :py:class:`tritonclient.grpc.InferenceServerException` If inputs_iterator does not yield the correct input. """ diff --git a/src/python/library/tritonclient/grpc/auth/__init__.py b/src/python/library/tritonclient/grpc/auth/__init__.py index ba0e25719..af220636c 100755 --- a/src/python/library/tritonclient/grpc/auth/__init__.py +++ b/src/python/library/tritonclient/grpc/auth/__init__.py @@ -27,3 +27,5 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ..._auth import BasicAuth + +__all__ = ["BasicAuth"] diff --git a/src/python/library/tritonclient/http/__init__.py b/src/python/library/tritonclient/http/__init__.py index 84fafabf7..b221d0604 100755 --- a/src/python/library/tritonclient/http/__init__.py +++ b/src/python/library/tritonclient/http/__init__.py @@ -30,6 +30,7 @@ from tritonclient.utils import * from .._plugin import InferenceServerClientPlugin + from .._request import Request from ._client import InferAsyncRequest, InferenceServerClient from ._infer_input import InferInput from ._infer_result import InferResult @@ -39,3 +40,14 @@ raise RuntimeError( "The installation does not include http support. Specify 'http' or 'all' while installing the tritonclient package to include the support" ) from error + +__all__ = [ + "InferenceServerClientPlugin", + "Request", + "InferenceServerClient", + "InferInput", + "InferRequestedOutput", + "InferResult", + "InferAsyncRequest", + "InferenceServerException", +] diff --git a/src/python/library/tritonclient/http/_client.py b/src/python/library/tritonclient/http/_client.py index a769d7f3b..5608b56b7 100755 --- a/src/python/library/tritonclient/http/_client.py +++ b/src/python/library/tritonclient/http/_client.py @@ -64,6 +64,7 @@ def __init__(self, greenlet, verbose=False): def get_result(self, block=True, timeout=None): """Get the results of the associated asynchronous inference. + Parameters ---------- block : bool @@ -86,6 +87,7 @@ def get_result(self, block=True, timeout=None): InferenceServerException If server fails to perform inference or failed to respond within specified timeout. + """ try: @@ -152,9 +154,9 @@ class InferenceServerClient(InferenceServerClientBase): specified False. Raises - ------ - Exception - If unable to create a client. + ------ + Exception + If unable to create a client. """ @@ -209,7 +211,7 @@ def close(self): def _get(self, request_uri, headers, query_params): """Issues the GET request to the server - Parameters + Parameters ---------- request_uri: str The request URI to be used in GET request. @@ -223,6 +225,7 @@ def _get(self, request_uri, headers, query_params): ------- geventhttpclient.response.HTTPSocketPoolResponse The response from server. + """ request = Request(headers) self._call_plugin(request) @@ -860,6 +863,7 @@ def get_trace_settings(self, model_name=None, headers=None, query_params=None): def update_log_settings(self, settings, headers=None, query_params=None): """Update the global log settings of the Triton server. + Parameters ---------- settings: dict @@ -871,14 +875,17 @@ def update_log_settings(self, settings, headers=None, query_params=None): query_params: dict Optional url query parameters to use in network transaction + Returns ------- dict The JSON dict holding the updated log settings. + Raises ------ InferenceServerException If unable to update the log settings. + """ request_uri = "v2/logging" response = self._post( @@ -897,6 +904,7 @@ def update_log_settings(self, settings, headers=None, query_params=None): def get_log_settings(self, headers=None, query_params=None): """Get the global log settings for the Triton server + Parameters ---------- headers: dict @@ -905,14 +913,17 @@ def get_log_settings(self, headers=None, query_params=None): query_params: dict Optional url query parameters to use in network transaction + Returns ------- dict The JSON dict holding the log settings. + Raises ------ InferenceServerException If unable to get the log settings. + """ request_uri = "v2/logging" @@ -1219,10 +1230,10 @@ def generate_request_body( Parameters ---------- inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id: str @@ -1290,7 +1301,7 @@ def generate_request_body( def parse_response_body( response_body, verbose=False, header_length=None, content_encoding=None ): - """Generate a InferResult object from the given 'response_body' + """Generate a :py:class:`InferResult` object from the given 'response_body' Parameters ---------- @@ -1340,14 +1351,14 @@ def infer( model_name: str The name of the model to run inference. inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. model_version: str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id: str @@ -1501,14 +1512,14 @@ def async_infer( model_name: str The name of the model to run inference. inputs : list - A list of InferInput objects, each describing data for a input + A list of :py:class:`InferInput` objects, each describing data for a input tensor required by the model. model_version: str The version of the model to run inference. The default value is an empty string which means then the server will choose a version based on the model and internal policy. outputs : list - A list of InferRequestedOutput objects, each describing how the output + A list of :py:class:`InferRequestedOutput` objects, each describing how the output data must be returned. If not specified all outputs produced by the model will be returned using default settings. request_id: str @@ -1564,7 +1575,7 @@ def async_infer( Returns ------- - InferAsyncRequest object + InferAsyncRequest The handle to the asynchronous inference request. Raises diff --git a/src/python/library/tritonclient/http/_infer_result.py b/src/python/library/tritonclient/http/_infer_result.py index bf9f82476..756f0c4fe 100755 --- a/src/python/library/tritonclient/http/_infer_result.py +++ b/src/python/library/tritonclient/http/_infer_result.py @@ -39,7 +39,7 @@ class InferResult: - """An object of InferResult class holds the response of + """An object of :py:class:`InferResult` class holds the response of an inference request and provide methods to retrieve inference results. @@ -109,7 +109,7 @@ def read(self, length=-1): def from_response_body( cls, response_body, verbose=False, header_length=None, content_encoding=None ): - """A class method to construct InferResult object + """A class method to construct :py:class:`InferResult` object from a given 'response_body'. Parameters diff --git a/src/python/library/tritonclient/http/_requested_output.py b/src/python/library/tritonclient/http/_requested_output.py index 5432de491..cbc30f649 100755 --- a/src/python/library/tritonclient/http/_requested_output.py +++ b/src/python/library/tritonclient/http/_requested_output.py @@ -29,7 +29,7 @@ class InferRequestedOutput: - """An object of InferRequestedOutput class is used to describe a + """An object of :py:class:`InferRequestedOutput` class is used to describe a requested output tensor for an inference request. Parameters @@ -93,7 +93,7 @@ def set_shared_memory(self, region_name, byte_size, offset=0): def unset_shared_memory(self): """Clears the shared memory option set by the last call to - InferRequestedOutput.set_shared_memory(). After call to this + :py:meth:`InferRequestedOutput.set_shared_memory()`. After call to this function requested output will no longer be returned in a shared memory region. """ diff --git a/src/python/library/tritonclient/http/_utils.py b/src/python/library/tritonclient/http/_utils.py index 2098a48bc..0770cc655 100755 --- a/src/python/library/tritonclient/http/_utils.py +++ b/src/python/library/tritonclient/http/_utils.py @@ -34,7 +34,7 @@ def _get_error(response): """ - Returns the InferenceServerException object if response + Returns the :py:class:`InferenceServerException` object if response indicates the error. If no error then return None """ if response.status_code != 200: @@ -61,7 +61,7 @@ def _get_error(response): def _raise_if_error(response): """ - Raise InferenceServerException if received non-Success + Raise :py:class:`InferenceServerException` if received non-Success response from the server """ error = _get_error(response) diff --git a/src/python/library/tritonclient/http/aio/__init__.py b/src/python/library/tritonclient/http/aio/__init__.py index 5101a310d..68cb99dce 100755 --- a/src/python/library/tritonclient/http/aio/__init__.py +++ b/src/python/library/tritonclient/http/aio/__init__.py @@ -50,7 +50,7 @@ async def _get_error(response): """ - Returns the InferenceServerException object if response + Returns the :py:class:`InferenceServerException` object if response indicates the error. If no error then return None """ if response.status != 200: @@ -78,7 +78,7 @@ async def _get_error(response): async def _raise_if_error(response): """ - Raise InferenceServerException if received non-Success + Raise :py:class:`InferenceServerException` if received non-Success response from the server """ error = await _get_error(response) @@ -89,7 +89,7 @@ async def _raise_if_error(response): class InferenceServerClient(InferenceServerClientBase): """This feature is currently in beta and may be subject to change. - An analogy of the tritonclient.http.InferenceServerClient to enable + An analogy of the :py:class:`tritonclient.http.InferenceServerClient` to enable calling via asyncio syntax. The object is intended to be used by a single thread and simultaneously calling methods with different threads is not supported and can cause undefined behavior. @@ -135,7 +135,7 @@ async def close(self): async def _get(self, request_uri, headers, query_params): """Issues the GET request to the server - Parameters + Parameters ---------- request_uri: str The request URI to be used in GET request. @@ -149,6 +149,7 @@ async def _get(self, request_uri, headers, query_params): ------- aiohttp.ClientResponse The response from server. + """ request = Request(headers) self._call_plugin(request) @@ -264,7 +265,7 @@ def _fix_header(self, headers): return fix_header async def is_server_live(self, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_server_live`""" request_uri = "v2/health/live" response = await self._get( request_uri=request_uri, headers=headers, query_params=query_params @@ -273,7 +274,7 @@ async def is_server_live(self, headers=None, query_params=None): return response.status == 200 async def is_server_ready(self, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_server_ready`""" request_uri = "v2/health/ready" response = await self._get( request_uri=request_uri, headers=headers, query_params=query_params @@ -284,7 +285,7 @@ async def is_server_ready(self, headers=None, query_params=None): async def is_model_ready( self, model_name, model_version="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.is_model_ready`""" if type(model_version) != str: raise_error("model version must be a string") if model_version != "": @@ -301,7 +302,7 @@ async def is_model_ready( return response.status == 200 async def get_server_metadata(self, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_server_metadata`""" request_uri = "v2" response = await self._get( request_uri=request_uri, headers=headers, query_params=query_params @@ -317,7 +318,7 @@ async def get_server_metadata(self, headers=None, query_params=None): async def get_model_metadata( self, model_name, model_version="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_metadata`""" if type(model_version) != str: raise_error("model version must be a string") if model_version != "": @@ -341,7 +342,7 @@ async def get_model_metadata( async def get_model_config( self, model_name, model_version="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_config`""" if model_version != "": request_uri = "v2/models/{}/versions/{}/config".format( quote(model_name), model_version @@ -361,7 +362,7 @@ async def get_model_config( return json.loads(content) async def get_model_repository_index(self, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_model_repository_index`""" request_uri = "v2/repository/index" response = await self._post( request_uri=request_uri, @@ -380,7 +381,7 @@ async def get_model_repository_index(self, headers=None, query_params=None): async def load_model( self, model_name, headers=None, query_params=None, config=None, files=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.load_model`""" request_uri = "v2/repository/models/{}/load".format(quote(model_name)) load_request = {} if config is not None: @@ -405,7 +406,7 @@ async def load_model( async def unload_model( self, model_name, headers=None, query_params=None, unload_dependents=False ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unload_model`""" request_uri = "v2/repository/models/{}/unload".format(quote(model_name)) unload_request = {"parameters": {"unload_dependents": unload_dependents}} response = await self._post( @@ -421,7 +422,7 @@ async def unload_model( async def get_inference_statistics( self, model_name="", model_version="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_inference_statistics`""" if model_name != "": if type(model_version) != str: raise_error("model version must be a string") @@ -448,7 +449,7 @@ async def get_inference_statistics( async def update_trace_settings( self, model_name=None, settings={}, headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.update_trace_settings`""" if (model_name is not None) and (model_name != ""): request_uri = "v2/models/{}/trace/setting".format(quote(model_name)) else: @@ -471,7 +472,7 @@ async def update_trace_settings( async def get_trace_settings( self, model_name=None, headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_trace_settings`""" if (model_name is not None) and (model_name != ""): request_uri = "v2/models/{}/trace/setting".format(quote(model_name)) else: @@ -489,7 +490,7 @@ async def get_trace_settings( return json.loads(content) async def update_log_settings(self, settings, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.update_log_settings`""" request_uri = "v2/logging" response = await self._post( @@ -507,7 +508,7 @@ async def update_log_settings(self, settings, headers=None, query_params=None): return json.loads(content) async def get_log_settings(self, headers=None, query_params=None): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_log_settings`""" request_uri = "v2/logging" response = await self._get( @@ -524,7 +525,7 @@ async def get_log_settings(self, headers=None, query_params=None): async def get_system_shared_memory_status( self, region_name="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_system_shared_memory_status`""" if region_name != "": request_uri = "v2/systemsharedmemory/region/{}/status".format( quote(region_name) @@ -546,7 +547,7 @@ async def get_system_shared_memory_status( async def register_system_shared_memory( self, name, key, byte_size, offset=0, headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.register_system_shared_memory`""" request_uri = "v2/systemsharedmemory/region/{}/register".format(quote(name)) register_request = {"key": key, "offset": offset, "byte_size": byte_size} @@ -565,7 +566,7 @@ async def register_system_shared_memory( async def unregister_system_shared_memory( self, name="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unregister_system_shared_memory`""" if name != "": request_uri = "v2/systemsharedmemory/region/{}/unregister".format( quote(name) @@ -589,7 +590,7 @@ async def unregister_system_shared_memory( async def get_cuda_shared_memory_status( self, region_name="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.get_cuda_shared_memory_status`""" if region_name != "": request_uri = "v2/cudasharedmemory/region/{}/status".format( quote(region_name) @@ -611,7 +612,7 @@ async def get_cuda_shared_memory_status( async def register_cuda_shared_memory( self, name, raw_handle, device_id, byte_size, headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.register_cuda_shared_memory`""" request_uri = "v2/cudasharedmemory/region/{}/register".format(quote(name)) register_request = { @@ -634,7 +635,7 @@ async def register_cuda_shared_memory( async def unregister_cuda_shared_memory( self, name="", headers=None, query_params=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.unregister_cuda_shared_memory`""" if name != "": request_uri = "v2/cudasharedmemory/region/{}/unregister".format(quote(name)) else: @@ -665,7 +666,7 @@ def generate_request_body( timeout=None, parameters=None, ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.generate_request_body`""" return _get_inference_request( inputs=inputs, request_id=request_id, @@ -682,7 +683,7 @@ def generate_request_body( def parse_response_body( response_body, verbose=False, header_length=None, content_encoding=None ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.parse_response_body`""" return InferResult.from_response_body( response_body, verbose, header_length, content_encoding ) @@ -705,7 +706,7 @@ async def infer( response_compression_algorithm=None, parameters=None, ): - """Refer to tritonclient.http.InferenceServerClient""" + """Refer to :py:meth:`tritonclient.http.InferenceServerClient.infer`""" request_body, json_size = _get_inference_request( inputs=inputs, request_id=request_id, diff --git a/src/python/library/tritonclient/http/auth/__init__.py b/src/python/library/tritonclient/http/auth/__init__.py index ba0e25719..af220636c 100755 --- a/src/python/library/tritonclient/http/auth/__init__.py +++ b/src/python/library/tritonclient/http/auth/__init__.py @@ -27,3 +27,5 @@ # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. from ..._auth import BasicAuth + +__all__ = ["BasicAuth"] diff --git a/src/python/library/tritonclient/utils/__init__.py b/src/python/library/tritonclient/utils/__init__.py index e761217ed..7f3079c66 100755 --- a/src/python/library/tritonclient/utils/__init__.py +++ b/src/python/library/tritonclient/utils/__init__.py @@ -193,7 +193,7 @@ def triton_to_np_dtype(dtype): def serialize_byte_tensor(input_tensor): """ Serializes a bytes tensor into a flat numpy array of length prepended - bytes. The numpy array should use dtype of np.object_. For np.bytes_, + bytes. The numpy array should use dtype of np.object. For np.bytes, numpy will remove trailing zeros at the end of byte sequence and because of this it should be avoided.