From 3de5922c48ece05b5d55862063e67ab3a22c97f5 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 5 Oct 2023 14:30:30 -0700 Subject: [PATCH] Rollback non-decoupled any response on cancel --- README.md | 11 ++++------- src/infer_request.cc | 6 ------ src/infer_request.h | 1 - src/pb_cancel.cc | 6 ------ src/pb_cancel.h | 2 -- src/pb_stub.cc | 13 +++---------- 6 files changed, 7 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 86abb10d..6a45a619 100644 --- a/README.md +++ b/README.md @@ -508,10 +508,8 @@ Supported error codes: #### Request Cancellation Handling One or more requests may be cancelled by the client during execution. Starting -from 23.10, `request.is_cancelled()` returns whether the request is cancelled. - -If a request is cancelled, the model may respond with any dummy object in place -of the normal output tensors on the request. For example: +from 23.10, `request.is_cancelled()` returns whether the request is cancelled or +not. For example: ```python import triton_python_backend_utils as pb_utils @@ -524,7 +522,8 @@ class TritonPythonModel: for request in requests: if request.is_cancelled(): - responses.append(None) + responses.append(pb_utils.InferenceResponse( + error=pb_utils.TritonError("Message", pb_utils.TritonError.CANCELLED))) else: ... @@ -600,8 +599,6 @@ full power of what can be achieved from decoupled API. Read [Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md) for more details on how to host a decoupled model. -##### - ##### Known Issues * Currently, decoupled Python models can not make async infer requests. diff --git a/src/infer_request.cc b/src/infer_request.cc index e148b062..e9d243f1 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -410,12 +410,6 @@ InferRequest::IsCancelled() return pb_cancel_->IsCancelled(); } -bool -InferRequest::IsCancelledLastResponse() -{ - return pb_cancel_->IsCancelledInternalFlag(); -} - std::shared_ptr InferRequest::GetResponseSender() { diff --git a/src/infer_request.h b/src/infer_request.h index 926e50b9..bc6a2acf 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -109,7 +109,6 @@ class InferRequest { std::shared_ptr Exec(const bool is_decoupled); std::shared_ptr GetResponseSender(); bool IsCancelled(); - bool IsCancelledLastResponse(); #endif /// Save an Inference Request to shared memory. diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc index 4fdcda81..4c9b926b 100644 --- a/src/pb_cancel.cc +++ b/src/pb_cancel.cc @@ -54,12 +54,6 @@ PbCancel::ShmPayload() return cancel_shm_.data_.get(); } -bool -PbCancel::IsCancelledInternalFlag() -{ - return is_cancelled_; -} - bool PbCancel::IsCancelled() { diff --git a/src/pb_cancel.h b/src/pb_cancel.h index 4eb4a8ff..3ebf07b5 100644 --- a/src/pb_cancel.h +++ b/src/pb_cancel.h @@ -46,8 +46,6 @@ class PbCancel { bi::managed_external_buffer::handle_t ShmHandle(); IsCancelledMessage* ShmPayload(); - bool IsCancelledInternalFlag(); - bool IsCancelled(); void ReportIsCancelled(bool is_cancelled); diff --git a/src/pb_stub.cc b/src/pb_stub.cc index c379998d..87abe583 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -771,17 +771,10 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr) std::to_string(response_size) + "\n"; throw PythonBackendException(err); } - for (size_t i = 0; i < response_size; i++) { - // If the model has checked for cancellation and the request is cancelled, - // replace returned type with a cancelled response. - if (py_request_list[i].cast()->IsCancelledLastResponse()) { - responses[i] = std::make_shared( - std::vector>{}, - std::make_shared("", TRITONSERVER_ERROR_CANCELLED)); - } + for (auto& response : responses) { // Check the return type of execute function. - else if (!py::isinstance(responses[i])) { - std::string str = py::str(responses[i].get_type()); + if (!py::isinstance(response)) { + std::string str = py::str(response.get_type()); throw PythonBackendException( std::string("Expected an 'InferenceResponse' object in the execute " "function return list, found type '") +