From 4b6bb09fcb0643241098d3a5475b02c25fbe0f14 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 26 Sep 2023 15:53:16 -0700 Subject: [PATCH 01/12] Add cancelled response status --- README.md | 1 + src/pb_stub.cc | 4 ++++ 2 files changed, 5 insertions(+) diff --git a/README.md b/README.md index 517a9b64..6ad95407 100644 --- a/README.md +++ b/README.md @@ -502,6 +502,7 @@ Supported error codes: * `pb_utils.TritonError.UNAVAILABLE` * `pb_utils.TritonError.UNSUPPORTED` * `pb_utils.TritonError.ALREADY_EXISTS` +* `pb_utils.TritonError.CANCELLED` (since 23.10) #### Decoupled mode diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 37c9a5b5..cd9fe799 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1364,6 +1364,7 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .value( "ALREADY_EXISTS", TRITONSERVER_Error_Code::TRITONSERVER_ERROR_ALREADY_EXISTS) + .value("CANCELLED", TRITONSERVER_Error_Code::TRITONSERVER_ERROR_CANCELLED) .export_values(); triton_error.def_property_readonly_static( "UNKNOWN", @@ -1386,6 +1387,9 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) triton_error.def_property_readonly_static( "ALREADY_EXISTS", [](py::object /* self */) { return TRITONSERVER_ERROR_ALREADY_EXISTS; }); + triton_error.def_property_readonly_static( + "CANCELLED", + [](py::object /* self */) { return TRITONSERVER_ERROR_CANCELLED; }); triton_error.def( py::init(), py::arg("message").none(false), From af0f29b0ea6b0a08f4900fe27a50b01b03b388bd Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 27 Sep 2023 18:58:25 -0700 Subject: [PATCH 02/12] Add request cancellation --- CMakeLists.txt | 2 ++ README.md | 32 +++++++++++++++++++ src/infer_request.cc | 17 +++++++++++ src/infer_request.h | 1 + src/ipc_message.h | 3 +- src/pb_cancel.cc | 73 ++++++++++++++++++++++++++++++++++++++++++++ src/pb_cancel.h | 62 +++++++++++++++++++++++++++++++++++++ src/pb_stub.cc | 45 ++++++++++++++++++++++++++- src/pb_stub.h | 7 +++++ src/pb_utils.h | 5 +++ src/python_be.cc | 28 +++++++++++++++++ src/python_be.h | 3 ++ 12 files changed, 276 insertions(+), 2 deletions(-) create mode 100644 src/pb_cancel.cc create mode 100644 src/pb_cancel.h diff --git a/CMakeLists.txt b/CMakeLists.txt index 93a7ae60..057797dd 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,6 +150,8 @@ set( src/pb_error.h src/pb_log.cc src/pb_log.h + src/pb_cancel.cc + src/pb_cancel.h src/pb_memory.cc src/pb_memory.h src/pb_tensor.cc diff --git a/README.md b/README.md index 6ad95407..aa4e5cc2 100644 --- a/README.md +++ b/README.md @@ -46,6 +46,7 @@ any C++ code. - [`execute`](#execute) - [Default Mode](#default-mode) - [Error Handling](#error-handling) + - [Request Cancellation](#request-cancellation) - [Decoupled mode](#decoupled-mode) - [Use Cases](#use-cases) - [Known Issues](#known-issues) @@ -504,6 +505,37 @@ Supported error codes: * `pb_utils.TritonError.ALREADY_EXISTS` * `pb_utils.TritonError.CANCELLED` (since 23.10) +#### Request Cancellation + +One or more requests may be cancelled during execution, for example, cancelled +by the user. Starting from 23.10, `request.is_cancelled()` returns up-to-date +`True` or `False` on whether the request is cancelled. If a request is +cancelled, the model should respond `pb_utils.TritonError.CANCELLED` in place of +the normal output tensors on the request. For example: + +```python +import triton_python_backend_utils as pb_utils + +class TritonPythonModel: + ... + + def execute(self, requests): + responses = [] + + for request in requests: + if request.is_cancelled(): + responses.append(pb_utils.InferenceResponse( + error=pb_utils.TritonError("Message", pb_utils.TritonError.CANCELLED))) + else: + ... + + return responses +``` + +Although checking for request cancellation is optional, it is recommended to +check for cancellation at strategic request execution stages that can early +terminate the execution in the event of its response is no longer needed. + #### Decoupled mode This mode allows user to send multiple responses for a request or diff --git a/src/infer_request.cc b/src/infer_request.cc index 5fdae669..270c7bb3 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -400,6 +400,23 @@ InferRequest::DeleteResponseFactory() #endif #ifdef TRITON_PB_STUB +bool +InferRequest::IsCancelled() +{ + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + if (!stub->StubToParentServiceActive()) { + LOG_ERROR << "Cannot communicate with parent service"; + return false; + } + if (request_address_ == 0) { + LOG_ERROR << "Request address not provided (default initialized?)"; + return false; + } + std::unique_ptr pb_cancel(new PbCancel(request_address_)); + stub->EnqueueIsCancelled(pb_cancel); + return pb_cancel->IsCancelled(); +} + std::shared_ptr InferRequest::GetResponseSender() { diff --git a/src/infer_request.h b/src/infer_request.h index 6652b2fb..a96545a3 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -107,6 +107,7 @@ class InferRequest { #ifdef TRITON_PB_STUB std::shared_ptr Exec(const bool is_decoupled); std::shared_ptr GetResponseSender(); + bool IsCancelled(); #endif /// Save an Inference Request to shared memory. diff --git a/src/ipc_message.h b/src/ipc_message.h index 7040f2b4..14d3dc5f 100644 --- a/src/ipc_message.h +++ b/src/ipc_message.h @@ -62,7 +62,8 @@ typedef enum PYTHONSTUB_commandtype_enum { PYTHONSTUB_MetricRequestSet, PYTHONSTUB_LoadModelRequest, PYTHONSTUB_UnloadModelRequest, - PYTHONSTUB_ModelReadinessRequest + PYTHONSTUB_ModelReadinessRequest, + PYTHONSTUB_IsRequestCancelled } PYTHONSTUB_CommandType; /// diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc new file mode 100644 index 00000000..9add280b --- /dev/null +++ b/src/pb_cancel.cc @@ -0,0 +1,73 @@ +// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "pb_cancel.h" + +namespace triton { namespace backend { namespace python { + +void +PbCancel::SaveToSharedMemory(std::unique_ptr& shm_pool) +{ + cancel_shm_ = shm_pool->Construct(); + new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex; + new (&(cancel_shm_.data_->cv)) bi::interprocess_condition; + cancel_shm_.data_->waiting_on_stub = false; + cancel_shm_.data_->request_address = request_address_; + cancel_shm_.data_->is_cancelled = is_cancelled_; +} + +bi::managed_external_buffer::handle_t +PbCancel::ShmHandle() +{ + return cancel_shm_.handle_; +} + +IsCancelledMessage* +PbCancel::ShmPayload() +{ + return cancel_shm_.data_.get(); +} + +bool +PbCancel::IsCancelled() +{ + std::unique_lock lk(mu_); + cv_.wait(lk, [this] { return updated_; }); + return is_cancelled_; +} + +void +PbCancel::ReportIsCancelled(bool is_cancelled) +{ + { + std::lock_guard lk(mu_); + is_cancelled_ = is_cancelled; + updated_ = true; + } + cv_.notify_all(); +} + +}}} // namespace triton::backend::python diff --git a/src/pb_cancel.h b/src/pb_cancel.h new file mode 100644 index 00000000..904e98c0 --- /dev/null +++ b/src/pb_cancel.h @@ -0,0 +1,62 @@ +// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#pragma once + +#include +#include + +#include "pb_utils.h" + +namespace triton { namespace backend { namespace python { + +class PbCancel { + public: + PbCancel(intptr_t request_address) + : updated_(false), request_address_(request_address), is_cancelled_(false) + { + } + DISALLOW_COPY_AND_ASSIGN(PbCancel); + + void SaveToSharedMemory(std::unique_ptr& shm_pool); + bi::managed_external_buffer::handle_t ShmHandle(); + IsCancelledMessage* ShmPayload(); + + bool IsCancelled(); + void ReportIsCancelled(bool is_cancelled); + + private: + AllocatedSharedMemory cancel_shm_; + + std::mutex mu_; + std::condition_variable cv_; + bool updated_; + + intptr_t request_address_; + bool is_cancelled_; +}; + +}}}; // namespace triton::backend::python diff --git a/src/pb_stub.cc b/src/pb_stub.cc index cd9fe799..5a5094e0 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -945,6 +945,9 @@ Stub::ServiceStubToParentRequests() SendLogMessage(utils_msg_payload); } else if (utils_msg_payload->command_type == PYTHONSTUB_CleanupRequest) { SendCleanupId(utils_msg_payload); + } else if ( + utils_msg_payload->command_type == PYTHONSTUB_IsRequestCancelled) { + SendIsCancelled(utils_msg_payload); } else { std::cerr << "Error when sending message via stub_to_parent message " "buffer - unknown command\n"; @@ -1028,6 +1031,45 @@ Stub::EnqueueCleanupId(void* id) } } +void +Stub::EnqueueIsCancelled(const std::unique_ptr& pb_cancel) +{ + std::unique_ptr utils_msg_payload = + std::make_unique( + PYTHONSTUB_IsRequestCancelled, + reinterpret_cast(pb_cancel.get())); + EnqueueUtilsMessage(std::move(utils_msg_payload)); +} + +void +Stub::SendIsCancelled(std::unique_ptr& utils_msg_payload) +{ + PbCancel* pb_cancel = + reinterpret_cast(utils_msg_payload->utils_message_ptr); + pb_cancel->SaveToSharedMemory(shm_pool_); + + IsCancelledMessage* message_payload = pb_cancel->ShmPayload(); + std::unique_ptr ipc_message = + IPCMessage::Create(shm_pool_, false /* inline_response */); + ipc_message->Command() = utils_msg_payload->command_type; + ipc_message->Args() = pb_cancel->ShmHandle(); + + bool is_cancelled = false; + { + bi::scoped_lock lk(message_payload->mu); + + SendIPCUtilsMessage(ipc_message); + while (!message_payload->waiting_on_stub) { + message_payload->cv.wait(lk); + } + + is_cancelled = message_payload->is_cancelled; + message_payload->waiting_on_stub = false; + message_payload->cv.notify_all(); + } + pb_cancel->ReportIsCancelled(is_cancelled); +} + bool Stub::StubToParentServiceActive() { @@ -1505,7 +1547,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) .def( "requested_output_names", &InferRequest::RequestedOutputNames, py::return_value_policy::reference_internal) - .def("get_response_sender", &InferRequest::GetResponseSender); + .def("get_response_sender", &InferRequest::GetResponseSender) + .def("is_cancelled", &InferRequest::IsCancelled); py::class_>(module, "Tensor") .def(py::init(&PbTensor::FromNumpy)) diff --git a/src/pb_stub.h b/src/pb_stub.h index 6d047d29..530dca2c 100644 --- a/src/pb_stub.h +++ b/src/pb_stub.h @@ -49,6 +49,7 @@ #include "message_queue.h" #include "metric.h" #include "metric_family.h" +#include "pb_cancel.h" #include "pb_log.h" #include "pb_response_iterator.h" #include "pb_utils.h" @@ -308,6 +309,12 @@ class Stub { /// Add cleanup id to queue void EnqueueCleanupId(void* id); + /// Add request cancellation query to queue + void EnqueueIsCancelled(const std::unique_ptr& pb_cancel); + + /// Send request cancellation query to python backend + void SendIsCancelled(std::unique_ptr& utils_msg_payload); + /// Is the stub initialized bool IsInitialized(); diff --git a/src/pb_utils.h b/src/pb_utils.h index 1d651f3f..71a70272 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -182,6 +182,11 @@ struct CleanupMessage : SendMessageBase { void* id; }; +struct IsCancelledMessage : SendMessageBase { + intptr_t request_address; + bool is_cancelled; +}; + struct CustomMetricsMessage : SendMessageBase { bi::managed_external_buffer::handle_t message; bool has_error; diff --git a/src/python_be.cc b/src/python_be.cc index b196cfab..b9835221 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -817,6 +817,10 @@ ModelInstanceState::StubToParentMQMonitor() ProcessBLSCleanupRequest(message); break; } + case PYTHONSTUB_IsRequestCancelled: { + ProcessIsRequestCancelled(message); + break; + } case PYTHONSTUB_MetricFamilyRequestNew: case PYTHONSTUB_MetricFamilyRequestDelete: { ProcessMetricFamilyRequest(message); @@ -918,6 +922,30 @@ ModelInstanceState::ProcessBLSCleanupRequest( } } +void +ModelInstanceState::ProcessIsRequestCancelled( + const std::unique_ptr& message) +{ + AllocatedSharedMemory message_shm = + Stub()->ShmPool()->Load(message->Args()); + IsCancelledMessage* message_payload = + reinterpret_cast(message_shm.data_.get()); + + { + bi::scoped_lock lk{message_payload->mu}; + + TRITONBACKEND_Request* request = reinterpret_cast( + message_payload->request_address); + TRITONBACKEND_RequestIsCancelled(request, &message_payload->is_cancelled); + + message_payload->waiting_on_stub = true; + message_payload->cv.notify_all(); + while (message_payload->waiting_on_stub) { + message_payload->cv.wait(lk); + } + } +} + template void ModelInstanceState::ProcessMessage( diff --git a/src/python_be.h b/src/python_be.h index 825c45de..4c8d702f 100644 --- a/src/python_be.h +++ b/src/python_be.h @@ -394,6 +394,9 @@ class ModelInstanceState : public BackendModelInstance { // Process the bls decoupled cleanup request void ProcessBLSCleanupRequest(const std::unique_ptr& message); + // Process request cancellation query + void ProcessIsRequestCancelled(const std::unique_ptr& message); + // Process a message. The function 'request_handler' is invoked // to handle the request. T should be either 'MetricFamily', 'Metric' or // 'ModelLoader', and MessageType should be either 'MetricFamilyMessage', From 3d87786a0ef7c16c963eeb6724b7cc74a04c3416 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 29 Sep 2023 11:17:30 -0700 Subject: [PATCH 03/12] Check cancellation on response factory if available --- src/infer_request.cc | 7 ++----- src/pb_cancel.cc | 1 + src/pb_cancel.h | 6 ++++-- src/pb_utils.h | 1 + src/python_be.cc | 18 +++++++++++++++--- 5 files changed, 23 insertions(+), 10 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 270c7bb3..1410fb46 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -408,11 +408,8 @@ InferRequest::IsCancelled() LOG_ERROR << "Cannot communicate with parent service"; return false; } - if (request_address_ == 0) { - LOG_ERROR << "Request address not provided (default initialized?)"; - return false; - } - std::unique_ptr pb_cancel(new PbCancel(request_address_)); + std::unique_ptr pb_cancel( + new PbCancel(response_factory_address_, request_address_)); stub->EnqueueIsCancelled(pb_cancel); return pb_cancel->IsCancelled(); } diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc index 9add280b..272babf0 100644 --- a/src/pb_cancel.cc +++ b/src/pb_cancel.cc @@ -35,6 +35,7 @@ PbCancel::SaveToSharedMemory(std::unique_ptr& shm_pool) new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex; new (&(cancel_shm_.data_->cv)) bi::interprocess_condition; cancel_shm_.data_->waiting_on_stub = false; + cancel_shm_.data_->response_factory_address = response_factory_address_; cancel_shm_.data_->request_address = request_address_; cancel_shm_.data_->is_cancelled = is_cancelled_; } diff --git a/src/pb_cancel.h b/src/pb_cancel.h index 904e98c0..ec4954b3 100644 --- a/src/pb_cancel.h +++ b/src/pb_cancel.h @@ -35,8 +35,9 @@ namespace triton { namespace backend { namespace python { class PbCancel { public: - PbCancel(intptr_t request_address) - : updated_(false), request_address_(request_address), is_cancelled_(false) + PbCancel(intptr_t response_factory_address, intptr_t request_address) + : updated_(false), response_factory_address_(response_factory_address), + request_address_(request_address), is_cancelled_(false) { } DISALLOW_COPY_AND_ASSIGN(PbCancel); @@ -55,6 +56,7 @@ class PbCancel { std::condition_variable cv_; bool updated_; + intptr_t response_factory_address_; intptr_t request_address_; bool is_cancelled_; }; diff --git a/src/pb_utils.h b/src/pb_utils.h index 71a70272..612c46a4 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -183,6 +183,7 @@ struct CleanupMessage : SendMessageBase { }; struct IsCancelledMessage : SendMessageBase { + intptr_t response_factory_address; intptr_t request_address; bool is_cancelled; }; diff --git a/src/python_be.cc b/src/python_be.cc index b9835221..f70a01a3 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -934,9 +934,21 @@ ModelInstanceState::ProcessIsRequestCancelled( { bi::scoped_lock lk{message_payload->mu}; - TRITONBACKEND_Request* request = reinterpret_cast( - message_payload->request_address); - TRITONBACKEND_RequestIsCancelled(request, &message_payload->is_cancelled); + if (message_payload->response_factory_address != 0) { + TRITONBACKEND_ResponseFactory* response_factory = + reinterpret_cast( + message_payload->response_factory_address); + TRITONBACKEND_ResponseFactoryIsCancelled( + response_factory, &message_payload->is_cancelled); + } else if (message_payload->request_address != 0) { + TRITONBACKEND_Request* request = reinterpret_cast( + message_payload->request_address); + TRITONBACKEND_RequestIsCancelled(request, &message_payload->is_cancelled); + } else { + LOG_MESSAGE( + TRITONSERVER_LOG_ERROR, "Cannot determine request cancellation"); + message_payload->is_cancelled = false; + } message_payload->waiting_on_stub = true; message_payload->cv.notify_all(); From 2319355a742f9a9f7c5d6d37b9a485330ace7a9b Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 29 Sep 2023 12:08:15 -0700 Subject: [PATCH 04/12] Remove unnecessary wrapping --- src/infer_request.cc | 7 +++---- src/pb_stub.cc | 5 ++--- src/pb_stub.h | 2 +- 3 files changed, 6 insertions(+), 8 deletions(-) diff --git a/src/infer_request.cc b/src/infer_request.cc index 1410fb46..8e753bb1 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -408,10 +408,9 @@ InferRequest::IsCancelled() LOG_ERROR << "Cannot communicate with parent service"; return false; } - std::unique_ptr pb_cancel( - new PbCancel(response_factory_address_, request_address_)); - stub->EnqueueIsCancelled(pb_cancel); - return pb_cancel->IsCancelled(); + PbCancel pb_cancel(response_factory_address_, request_address_); + stub->EnqueueIsCancelled(&pb_cancel); + return pb_cancel.IsCancelled(); } std::shared_ptr diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 5a5094e0..2eb8b08c 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1032,12 +1032,11 @@ Stub::EnqueueCleanupId(void* id) } void -Stub::EnqueueIsCancelled(const std::unique_ptr& pb_cancel) +Stub::EnqueueIsCancelled(PbCancel* pb_cancel) { std::unique_ptr utils_msg_payload = std::make_unique( - PYTHONSTUB_IsRequestCancelled, - reinterpret_cast(pb_cancel.get())); + PYTHONSTUB_IsRequestCancelled, reinterpret_cast(pb_cancel)); EnqueueUtilsMessage(std::move(utils_msg_payload)); } diff --git a/src/pb_stub.h b/src/pb_stub.h index 530dca2c..d52196e1 100644 --- a/src/pb_stub.h +++ b/src/pb_stub.h @@ -310,7 +310,7 @@ class Stub { void EnqueueCleanupId(void* id); /// Add request cancellation query to queue - void EnqueueIsCancelled(const std::unique_ptr& pb_cancel); + void EnqueueIsCancelled(PbCancel* pb_cancel); /// Send request cancellation query to python backend void SendIsCancelled(std::unique_ptr& utils_msg_payload); From 9262f7c11d6d5cabd55726fa34509dd8d599acae Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Fri, 29 Sep 2023 18:01:16 -0700 Subject: [PATCH 05/12] Throw error instead of log error --- src/python_be.cc | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index f70a01a3..7f46d473 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -945,9 +945,7 @@ ModelInstanceState::ProcessIsRequestCancelled( message_payload->request_address); TRITONBACKEND_RequestIsCancelled(request, &message_payload->is_cancelled); } else { - LOG_MESSAGE( - TRITONSERVER_LOG_ERROR, "Cannot determine request cancellation"); - message_payload->is_cancelled = false; + throw PythonBackendException("Cannot determine request cancellation"); } message_payload->waiting_on_stub = true; From 913ab0a30c922d0bbd2d39e705463d427a8d9fae Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Mon, 2 Oct 2023 14:31:12 -0700 Subject: [PATCH 06/12] Add is cancelled check at response sender --- src/pb_stub.cc | 3 ++- src/response_sender.cc | 14 ++++++++++++++ src/response_sender.h | 1 + 3 files changed, 17 insertions(+), 1 deletion(-) diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 2eb8b08c..87abe583 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -1585,7 +1585,8 @@ PYBIND11_EMBEDDED_MODULE(c_python_backend_utils, module) module, "InferenceResponseSender") .def( "send", &ResponseSender::Send, py::arg("response") = nullptr, - py::arg("flags") = 0); + py::arg("flags") = 0) + .def("is_cancelled", &ResponseSender::IsCancelled); py::class_>( module, "ResponseIterator") diff --git a/src/response_sender.cc b/src/response_sender.cc index a74459f6..9390aa15 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -184,4 +184,18 @@ ResponseSender::Send( } } } + +bool +ResponseSender::IsCancelled() +{ + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + if (!stub->StubToParentServiceActive()) { + LOG_ERROR << "Cannot communicate with parent service"; + return false; + } + PbCancel pb_cancel(response_factory_address_, request_address_); + stub->EnqueueIsCancelled(&pb_cancel); + return pb_cancel.IsCancelled(); +} + }}} // namespace triton::backend::python diff --git a/src/response_sender.h b/src/response_sender.h index 114f22c0..9b3509f1 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -37,6 +37,7 @@ class ResponseSender { intptr_t request_address, intptr_t response_factory_address, std::unique_ptr& shm_pool); void Send(std::shared_ptr response, const uint32_t flags); + bool IsCancelled(); private: intptr_t request_address_; From aee842e150b7555efef28b9ff89b3663f1130a4f Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 3 Oct 2023 16:03:38 -0700 Subject: [PATCH 07/12] Enable more reuse on request cancellation and improve model interface --- CMakeLists.txt | 4 ++-- README.md | 25 ++++++++++++++++--------- src/infer_request.cc | 23 +++++++++++++---------- src/infer_request.h | 3 +++ src/pb_cancel.cc | 26 ++++++++++++++++++++++++-- src/pb_cancel.h | 6 ++++-- src/pb_stub.cc | 13 ++++++++++--- src/response_sender.cc | 16 +++++++--------- src/response_sender.h | 5 ++++- 9 files changed, 83 insertions(+), 38 deletions(-) diff --git a/CMakeLists.txt b/CMakeLists.txt index 057797dd..3f20bbc3 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -150,8 +150,6 @@ set( src/pb_error.h src/pb_log.cc src/pb_log.h - src/pb_cancel.cc - src/pb_cancel.h src/pb_memory.cc src/pb_memory.h src/pb_tensor.cc @@ -210,6 +208,8 @@ set( src/pb_stub.cc src/pb_response_iterator.h src/pb_response_iterator.cc + src/pb_cancel.cc + src/pb_cancel.h ) list(APPEND diff --git a/README.md b/README.md index aa4e5cc2..da7dcb2e 100644 --- a/README.md +++ b/README.md @@ -46,7 +46,7 @@ any C++ code. - [`execute`](#execute) - [Default Mode](#default-mode) - [Error Handling](#error-handling) - - [Request Cancellation](#request-cancellation) + - [Request Cancellation Handling](#request-cancellation-handling) - [Decoupled mode](#decoupled-mode) - [Use Cases](#use-cases) - [Known Issues](#known-issues) @@ -505,13 +505,13 @@ Supported error codes: * `pb_utils.TritonError.ALREADY_EXISTS` * `pb_utils.TritonError.CANCELLED` (since 23.10) -#### Request Cancellation +#### Request Cancellation Handling -One or more requests may be cancelled during execution, for example, cancelled -by the user. Starting from 23.10, `request.is_cancelled()` returns up-to-date -`True` or `False` on whether the request is cancelled. If a request is -cancelled, the model should respond `pb_utils.TritonError.CANCELLED` in place of -the normal output tensors on the request. For example: +One or more requests may be cancelled by the client during execution. Starting +from 23.10, `request.is_cancelled()` returns up-to-date `True` or `False` on +whether the request is cancelled. If a request is cancelled, the model may +respond with any dummy object in place of the normal output tensors on the +request. For example: ```python import triton_python_backend_utils as pb_utils @@ -524,8 +524,7 @@ class TritonPythonModel: for request in requests: if request.is_cancelled(): - responses.append(pb_utils.InferenceResponse( - error=pb_utils.TritonError("Message", pb_utils.TritonError.CANCELLED))) + responses.append(None) else: ... @@ -576,6 +575,12 @@ request. After setting errors for an pb_utils.InferenceResponse object, use InferenceResponseSender.send() to send response with the error back to the user. +Starting from 23.10, request cancellation can be checked directly on the +`InferenceResponseSender` object, for example `response_sender.is_cancelled()`, +even after the request has gone out-of-scope. If +`response_sender.is_cancelled()` returned `True`, the +TRITONSERVER_RESPONSE_COMPLETE_FINAL flag is sent automatically. + ##### Use Cases The decoupled mode is powerful and supports various other use cases: @@ -598,6 +603,8 @@ full power of what can be achieved from decoupled API. Read [Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md) for more details on how to host a decoupled model. +##### + ##### Known Issues * Currently, decoupled Python models can not make async infer requests. diff --git a/src/infer_request.cc b/src/infer_request.cc index 8e753bb1..e148b062 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -71,9 +71,11 @@ InferRequest::InferRequest( inputs_ = inputs; requested_output_names_ = requested_output_names; #ifdef TRITON_PB_STUB + pb_cancel_ = + std::make_shared(response_factory_address_, request_address_); response_sender_ = std::make_shared( request_address_, response_factory_address_, - Stub::GetOrCreateInstance()->SharedMemory()); + Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_); #endif } @@ -379,9 +381,11 @@ InferRequest::InferRequest( trace_ = infer_request_shm_ptr_->trace; #ifdef TRITON_PB_STUB + pb_cancel_ = + std::make_shared(response_factory_address_, request_address_); response_sender_ = std::make_shared( request_address_, response_factory_address_, - Stub::GetOrCreateInstance()->SharedMemory()); + Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_); #endif } @@ -403,14 +407,13 @@ InferRequest::DeleteResponseFactory() bool InferRequest::IsCancelled() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); - if (!stub->StubToParentServiceActive()) { - LOG_ERROR << "Cannot communicate with parent service"; - return false; - } - PbCancel pb_cancel(response_factory_address_, request_address_); - stub->EnqueueIsCancelled(&pb_cancel); - return pb_cancel.IsCancelled(); + return pb_cancel_->IsCancelled(); +} + +bool +InferRequest::IsCancelledLastResponse() +{ + return pb_cancel_->IsCancelledInternalFlag(); } std::shared_ptr diff --git a/src/infer_request.h b/src/infer_request.h index a96545a3..926e50b9 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -34,6 +34,7 @@ #include "pb_tensor.h" #ifdef TRITON_PB_STUB +#include "pb_cancel.h" #include "response_sender.h" #endif @@ -108,6 +109,7 @@ class InferRequest { std::shared_ptr Exec(const bool is_decoupled); std::shared_ptr GetResponseSender(); bool IsCancelled(); + bool IsCancelledLastResponse(); #endif /// Save an Inference Request to shared memory. @@ -174,6 +176,7 @@ class InferRequest { std::unique_ptr parameters_shm_; #ifdef TRITON_PB_STUB + std::shared_ptr pb_cancel_; std::shared_ptr response_sender_; #endif }; diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc index 272babf0..4fdcda81 100644 --- a/src/pb_cancel.cc +++ b/src/pb_cancel.cc @@ -26,6 +26,8 @@ #include "pb_cancel.h" +#include "pb_stub.h" + namespace triton { namespace backend { namespace python { void @@ -52,11 +54,31 @@ PbCancel::ShmPayload() return cancel_shm_.data_.get(); } +bool +PbCancel::IsCancelledInternalFlag() +{ + return is_cancelled_; +} + bool PbCancel::IsCancelled() { std::unique_lock lk(mu_); - cv_.wait(lk, [this] { return updated_; }); + // The cancelled flag can only move from false to true, not the other way, so + // it is checked on each query until cancelled and then implicitly cached. + if (is_cancelled_) { + return is_cancelled_; + } + if (!updating_) { + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + if (!stub->StubToParentServiceActive()) { + LOG_ERROR << "Cannot communicate with parent service"; + return false; + } + stub->EnqueueIsCancelled(this); + updating_ = true; + } + cv_.wait(lk, [this] { return !updating_; }); return is_cancelled_; } @@ -66,7 +88,7 @@ PbCancel::ReportIsCancelled(bool is_cancelled) { std::lock_guard lk(mu_); is_cancelled_ = is_cancelled; - updated_ = true; + updating_ = false; } cv_.notify_all(); } diff --git a/src/pb_cancel.h b/src/pb_cancel.h index ec4954b3..4eb4a8ff 100644 --- a/src/pb_cancel.h +++ b/src/pb_cancel.h @@ -36,7 +36,7 @@ namespace triton { namespace backend { namespace python { class PbCancel { public: PbCancel(intptr_t response_factory_address, intptr_t request_address) - : updated_(false), response_factory_address_(response_factory_address), + : updating_(false), response_factory_address_(response_factory_address), request_address_(request_address), is_cancelled_(false) { } @@ -46,6 +46,8 @@ class PbCancel { bi::managed_external_buffer::handle_t ShmHandle(); IsCancelledMessage* ShmPayload(); + bool IsCancelledInternalFlag(); + bool IsCancelled(); void ReportIsCancelled(bool is_cancelled); @@ -54,7 +56,7 @@ class PbCancel { std::mutex mu_; std::condition_variable cv_; - bool updated_; + bool updating_; intptr_t response_factory_address_; intptr_t request_address_; diff --git a/src/pb_stub.cc b/src/pb_stub.cc index 87abe583..c379998d 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -771,10 +771,17 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr) std::to_string(response_size) + "\n"; throw PythonBackendException(err); } - for (auto& response : responses) { + for (size_t i = 0; i < response_size; i++) { + // If the model has checked for cancellation and the request is cancelled, + // replace returned type with a cancelled response. + if (py_request_list[i].cast()->IsCancelledLastResponse()) { + responses[i] = std::make_shared( + std::vector>{}, + std::make_shared("", TRITONSERVER_ERROR_CANCELLED)); + } // Check the return type of execute function. - if (!py::isinstance(response)) { - std::string str = py::str(response.get_type()); + else if (!py::isinstance(responses[i])) { + std::string str = py::str(responses[i].get_type()); throw PythonBackendException( std::string("Expected an 'InferenceResponse' object in the execute " "function return list, found type '") + diff --git a/src/response_sender.cc b/src/response_sender.cc index 9390aa15..bd10d271 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -37,10 +37,11 @@ namespace triton { namespace backend { namespace python { ResponseSender::ResponseSender( intptr_t request_address, intptr_t response_factory_address, - std::unique_ptr& shm_pool) + std::unique_ptr& shm_pool, + const std::shared_ptr& pb_cancel) : request_address_(request_address), response_factory_address_(response_factory_address), shm_pool_(shm_pool), - closed_(false) + closed_(false), pb_cancel_(pb_cancel) { } @@ -188,14 +189,11 @@ ResponseSender::Send( bool ResponseSender::IsCancelled() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); - if (!stub->StubToParentServiceActive()) { - LOG_ERROR << "Cannot communicate with parent service"; - return false; + bool is_cancelled = pb_cancel_->IsCancelled(); + if (is_cancelled && !closed_) { + Send(nullptr, TRITONSERVER_RESPONSE_COMPLETE_FINAL); } - PbCancel pb_cancel(response_factory_address_, request_address_); - stub->EnqueueIsCancelled(&pb_cancel); - return pb_cancel.IsCancelled(); + return is_cancelled; } }}} // namespace triton::backend::python diff --git a/src/response_sender.h b/src/response_sender.h index 9b3509f1..1d12765a 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -27,6 +27,7 @@ #pragma once #include "infer_response.h" +#include "pb_cancel.h" #include "shm_manager.h" namespace triton { namespace backend { namespace python { @@ -35,7 +36,8 @@ class ResponseSender { public: ResponseSender( intptr_t request_address, intptr_t response_factory_address, - std::unique_ptr& shm_pool); + std::unique_ptr& shm_pool, + const std::shared_ptr& pb_cancel); void Send(std::shared_ptr response, const uint32_t flags); bool IsCancelled(); @@ -44,5 +46,6 @@ class ResponseSender { intptr_t response_factory_address_; std::unique_ptr& shm_pool_; bool closed_; + std::shared_ptr pb_cancel_; }; }}} // namespace triton::backend::python From 3a43ef3a9d0550935ae9857f3c3d54e73987ab35 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Tue, 3 Oct 2023 18:29:21 -0700 Subject: [PATCH 08/12] Documentation wording updates --- README.md | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index da7dcb2e..8bc057c1 100644 --- a/README.md +++ b/README.md @@ -508,10 +508,10 @@ Supported error codes: #### Request Cancellation Handling One or more requests may be cancelled by the client during execution. Starting -from 23.10, `request.is_cancelled()` returns up-to-date `True` or `False` on -whether the request is cancelled. If a request is cancelled, the model may -respond with any dummy object in place of the normal output tensors on the -request. For example: +from 23.10, `request.is_cancelled()` returns whether the request is cancelled. + +If a request is cancelled, the model may respond with any dummy object in place +of the normal output tensors on the request. For example: ```python import triton_python_backend_utils as pb_utils @@ -576,10 +576,9 @@ object, use InferenceResponseSender.send() to send response with the error back to the user. Starting from 23.10, request cancellation can be checked directly on the -`InferenceResponseSender` object, for example `response_sender.is_cancelled()`, -even after the request has gone out-of-scope. If -`response_sender.is_cancelled()` returned `True`, the -TRITONSERVER_RESPONSE_COMPLETE_FINAL flag is sent automatically. +`InferenceResponseSender` object using `response_sender.is_cancelled()`. If +`response_sender.is_cancelled()` returned `True`, then no further steps are +needed to be performed on this object. ##### Use Cases From e3c476e01b28cba3b8c774a755d15e82d6aee3b2 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Wed, 4 Oct 2023 11:54:18 -0700 Subject: [PATCH 09/12] Copyright year update --- src/response_sender.cc | 2 +- src/response_sender.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/src/response_sender.cc b/src/response_sender.cc index bd10d271..952c6af7 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/response_sender.h b/src/response_sender.h index 1d12765a..fda0d5d3 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -1,4 +1,4 @@ -// Copyright (c) 2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions From a5892b6984d7b8cf467a764e05b40df72df1eed2 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 5 Oct 2023 00:08:44 -0700 Subject: [PATCH 10/12] Rollback response sender auto close on cancel --- README.md | 4 +--- src/response_sender.cc | 6 +----- 2 files changed, 2 insertions(+), 8 deletions(-) diff --git a/README.md b/README.md index 8bc057c1..86abb10d 100644 --- a/README.md +++ b/README.md @@ -576,9 +576,7 @@ object, use InferenceResponseSender.send() to send response with the error back to the user. Starting from 23.10, request cancellation can be checked directly on the -`InferenceResponseSender` object using `response_sender.is_cancelled()`. If -`response_sender.is_cancelled()` returned `True`, then no further steps are -needed to be performed on this object. +`InferenceResponseSender` object using `response_sender.is_cancelled()`. ##### Use Cases diff --git a/src/response_sender.cc b/src/response_sender.cc index 952c6af7..1e2e9b50 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -189,11 +189,7 @@ ResponseSender::Send( bool ResponseSender::IsCancelled() { - bool is_cancelled = pb_cancel_->IsCancelled(); - if (is_cancelled && !closed_) { - Send(nullptr, TRITONSERVER_RESPONSE_COMPLETE_FINAL); - } - return is_cancelled; + return pb_cancel_->IsCancelled(); } }}} // namespace triton::backend::python From 3de5922c48ece05b5d55862063e67ab3a22c97f5 Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 5 Oct 2023 14:30:30 -0700 Subject: [PATCH 11/12] Rollback non-decoupled any response on cancel --- README.md | 11 ++++------- src/infer_request.cc | 6 ------ src/infer_request.h | 1 - src/pb_cancel.cc | 6 ------ src/pb_cancel.h | 2 -- src/pb_stub.cc | 13 +++---------- 6 files changed, 7 insertions(+), 32 deletions(-) diff --git a/README.md b/README.md index 86abb10d..6a45a619 100644 --- a/README.md +++ b/README.md @@ -508,10 +508,8 @@ Supported error codes: #### Request Cancellation Handling One or more requests may be cancelled by the client during execution. Starting -from 23.10, `request.is_cancelled()` returns whether the request is cancelled. - -If a request is cancelled, the model may respond with any dummy object in place -of the normal output tensors on the request. For example: +from 23.10, `request.is_cancelled()` returns whether the request is cancelled or +not. For example: ```python import triton_python_backend_utils as pb_utils @@ -524,7 +522,8 @@ class TritonPythonModel: for request in requests: if request.is_cancelled(): - responses.append(None) + responses.append(pb_utils.InferenceResponse( + error=pb_utils.TritonError("Message", pb_utils.TritonError.CANCELLED))) else: ... @@ -600,8 +599,6 @@ full power of what can be achieved from decoupled API. Read [Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md) for more details on how to host a decoupled model. -##### - ##### Known Issues * Currently, decoupled Python models can not make async infer requests. diff --git a/src/infer_request.cc b/src/infer_request.cc index e148b062..e9d243f1 100644 --- a/src/infer_request.cc +++ b/src/infer_request.cc @@ -410,12 +410,6 @@ InferRequest::IsCancelled() return pb_cancel_->IsCancelled(); } -bool -InferRequest::IsCancelledLastResponse() -{ - return pb_cancel_->IsCancelledInternalFlag(); -} - std::shared_ptr InferRequest::GetResponseSender() { diff --git a/src/infer_request.h b/src/infer_request.h index 926e50b9..bc6a2acf 100644 --- a/src/infer_request.h +++ b/src/infer_request.h @@ -109,7 +109,6 @@ class InferRequest { std::shared_ptr Exec(const bool is_decoupled); std::shared_ptr GetResponseSender(); bool IsCancelled(); - bool IsCancelledLastResponse(); #endif /// Save an Inference Request to shared memory. diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc index 4fdcda81..4c9b926b 100644 --- a/src/pb_cancel.cc +++ b/src/pb_cancel.cc @@ -54,12 +54,6 @@ PbCancel::ShmPayload() return cancel_shm_.data_.get(); } -bool -PbCancel::IsCancelledInternalFlag() -{ - return is_cancelled_; -} - bool PbCancel::IsCancelled() { diff --git a/src/pb_cancel.h b/src/pb_cancel.h index 4eb4a8ff..3ebf07b5 100644 --- a/src/pb_cancel.h +++ b/src/pb_cancel.h @@ -46,8 +46,6 @@ class PbCancel { bi::managed_external_buffer::handle_t ShmHandle(); IsCancelledMessage* ShmPayload(); - bool IsCancelledInternalFlag(); - bool IsCancelled(); void ReportIsCancelled(bool is_cancelled); diff --git a/src/pb_stub.cc b/src/pb_stub.cc index c379998d..87abe583 100644 --- a/src/pb_stub.cc +++ b/src/pb_stub.cc @@ -771,17 +771,10 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr) std::to_string(response_size) + "\n"; throw PythonBackendException(err); } - for (size_t i = 0; i < response_size; i++) { - // If the model has checked for cancellation and the request is cancelled, - // replace returned type with a cancelled response. - if (py_request_list[i].cast()->IsCancelledLastResponse()) { - responses[i] = std::make_shared( - std::vector>{}, - std::make_shared("", TRITONSERVER_ERROR_CANCELLED)); - } + for (auto& response : responses) { // Check the return type of execute function. - else if (!py::isinstance(responses[i])) { - std::string str = py::str(responses[i].get_type()); + if (!py::isinstance(response)) { + std::string str = py::str(response.get_type()); throw PythonBackendException( std::string("Expected an 'InferenceResponse' object in the execute " "function return list, found type '") + From 09e358adab39322dbf807d00a9c993f16c375edf Mon Sep 17 00:00:00 2001 From: kthui <18255193+kthui@users.noreply.github.com> Date: Thu, 5 Oct 2023 16:29:49 -0700 Subject: [PATCH 12/12] Decoupled final flag docs update --- README.md | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/README.md b/README.md index 6a45a619..4cb9a960 100644 --- a/README.md +++ b/README.md @@ -575,7 +575,9 @@ object, use InferenceResponseSender.send() to send response with the error back to the user. Starting from 23.10, request cancellation can be checked directly on the -`InferenceResponseSender` object using `response_sender.is_cancelled()`. +`InferenceResponseSender` object using `response_sender.is_cancelled()`. Sending +the TRITONSERVER_RESPONSE_COMPLETE_FINAL flag at the end of response is still +needed even the request is cancelled. ##### Use Cases