triton-inference-server · kthui · Oct 6, 2023 · Sep 26, 2023 · Sep 28, 2023 · Sep 29, 2023
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -208,6 +208,8 @@ set(
   src/pb_stub.cc
   src/pb_response_iterator.h
   src/pb_response_iterator.cc
+  src/pb_cancel.cc
+  src/pb_cancel.h
 )
 
 list(APPEND

diff --git a/README.md b/README.md
@@ -46,6 +46,7 @@ any C++ code.
     - [`execute`](#execute)
       - [Default Mode](#default-mode)
       - [Error Handling](#error-handling)
+      - [Request Cancellation Handling](#request-cancellation-handling)
       - [Decoupled mode](#decoupled-mode)
         - [Use Cases](#use-cases)
         - [Known Issues](#known-issues)
@@ -502,6 +503,37 @@ Supported error codes:
 * `pb_utils.TritonError.UNAVAILABLE`
 * `pb_utils.TritonError.UNSUPPORTED`
 * `pb_utils.TritonError.ALREADY_EXISTS`
+* `pb_utils.TritonError.CANCELLED` (since 23.10)
+
+#### Request Cancellation Handling
+
+One or more requests may be cancelled by the client during execution. Starting
+from 23.10, `request.is_cancelled()` returns whether the request is cancelled.
+
+If a request is cancelled, the model may respond with any dummy object in place
+of the normal output tensors on the request. For example:
+
+```python
+import triton_python_backend_utils as pb_utils
+
+class TritonPythonModel:
+    ...
+
+    def execute(self, requests):
+        responses = []
+
+        for request in requests:
+            if request.is_cancelled():
+                responses.append(None)
+            else:
+                ...
+
+        return responses
+```
+
+Although checking for request cancellation is optional, it is recommended to
+check for cancellation at strategic request execution stages that can early
+terminate the execution in the event of its response is no longer needed.
 
 #### Decoupled mode
 
@@ -543,6 +575,11 @@ request. After setting errors for an pb_utils.InferenceResponse
 object, use InferenceResponseSender.send() to send response with the
 error back to the user.
 
+Starting from 23.10, request cancellation can be checked directly on the
+`InferenceResponseSender` object using `response_sender.is_cancelled()`. If
+`response_sender.is_cancelled()` returned `True`, then no further steps are
+needed to be performed on this object.
+
 ##### Use Cases
 
 The decoupled mode is powerful and supports various other use cases:
@@ -565,6 +602,8 @@ full power of what can be achieved from decoupled API. Read
 [Decoupled Backends and Models](https://github.com/triton-inference-server/server/blob/main/docs/user_guide/decoupled_models.md)
 for more details on how to host a decoupled model.
 
+#####
+
 ##### Known Issues
 
 * Currently, decoupled Python models can not make async infer requests.

diff --git a/src/infer_request.cc b/src/infer_request.cc
@@ -71,9 +71,11 @@ InferRequest::InferRequest(
   inputs_ = inputs;
   requested_output_names_ = requested_output_names;
 #ifdef TRITON_PB_STUB
+  pb_cancel_ =
+      std::make_shared<PbCancel>(response_factory_address_, request_address_);
   response_sender_ = std::make_shared<ResponseSender>(
       request_address_, response_factory_address_,
-      Stub::GetOrCreateInstance()->SharedMemory());
+      Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_);
 #endif
 }
 
@@ -379,9 +381,11 @@ InferRequest::InferRequest(
   trace_ = infer_request_shm_ptr_->trace;
 
 #ifdef TRITON_PB_STUB
+  pb_cancel_ =
+      std::make_shared<PbCancel>(response_factory_address_, request_address_);
   response_sender_ = std::make_shared<ResponseSender>(
       request_address_, response_factory_address_,
-      Stub::GetOrCreateInstance()->SharedMemory());
+      Stub::GetOrCreateInstance()->SharedMemory(), pb_cancel_);
 #endif
 }
 
@@ -400,6 +404,18 @@ InferRequest::DeleteResponseFactory()
 #endif
 
 #ifdef TRITON_PB_STUB
+bool
+InferRequest::IsCancelled()
+{
+  return pb_cancel_->IsCancelled();
+}
+
+bool
+InferRequest::IsCancelledLastResponse()
+{
+  return pb_cancel_->IsCancelledInternalFlag();
+}
+
 std::shared_ptr<ResponseSender>
 InferRequest::GetResponseSender()
 {

diff --git a/src/infer_request.h b/src/infer_request.h
@@ -34,6 +34,7 @@
 #include "pb_tensor.h"
 
 #ifdef TRITON_PB_STUB
+#include "pb_cancel.h"
 #include "response_sender.h"
 #endif
 
@@ -107,6 +108,8 @@ class InferRequest {
 #ifdef TRITON_PB_STUB
   std::shared_ptr<InferResponse> Exec(const bool is_decoupled);
   std::shared_ptr<ResponseSender> GetResponseSender();
+  bool IsCancelled();
+  bool IsCancelledLastResponse();
 #endif
 
   /// Save an Inference Request to shared memory.
@@ -173,6 +176,7 @@ class InferRequest {
   std::unique_ptr<PbString> parameters_shm_;
 
 #ifdef TRITON_PB_STUB
+  std::shared_ptr<PbCancel> pb_cancel_;
   std::shared_ptr<ResponseSender> response_sender_;
 #endif
 };

diff --git a/src/ipc_message.h b/src/ipc_message.h
@@ -62,7 +62,8 @@ typedef enum PYTHONSTUB_commandtype_enum {
   PYTHONSTUB_MetricRequestSet,
   PYTHONSTUB_LoadModelRequest,
   PYTHONSTUB_UnloadModelRequest,
-  PYTHONSTUB_ModelReadinessRequest
+  PYTHONSTUB_ModelReadinessRequest,
+  PYTHONSTUB_IsRequestCancelled
 } PYTHONSTUB_CommandType;
 
 ///

diff --git a/src/pb_cancel.cc b/src/pb_cancel.cc
@@ -0,0 +1,96 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "pb_cancel.h"
+
+#include "pb_stub.h"
+
+namespace triton { namespace backend { namespace python {
+
+void
+PbCancel::SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool)
+{
+  cancel_shm_ = shm_pool->Construct<IsCancelledMessage>();
+  new (&(cancel_shm_.data_->mu)) bi::interprocess_mutex;
+  new (&(cancel_shm_.data_->cv)) bi::interprocess_condition;
+  cancel_shm_.data_->waiting_on_stub = false;
+  cancel_shm_.data_->response_factory_address = response_factory_address_;
+  cancel_shm_.data_->request_address = request_address_;
+  cancel_shm_.data_->is_cancelled = is_cancelled_;
+}
+
+bi::managed_external_buffer::handle_t
+PbCancel::ShmHandle()
+{
+  return cancel_shm_.handle_;
+}
+
+IsCancelledMessage*
+PbCancel::ShmPayload()
+{
+  return cancel_shm_.data_.get();
+}
+
+bool
+PbCancel::IsCancelledInternalFlag()
+{
+  return is_cancelled_;
+}
+
+bool
+PbCancel::IsCancelled()
+{
+  std::unique_lock<std::mutex> lk(mu_);
+  // The cancelled flag can only move from false to true, not the other way, so
+  // it is checked on each query until cancelled and then implicitly cached.
+  if (is_cancelled_) {
+    return is_cancelled_;
+  }
+  if (!updating_) {
+    std::unique_ptr<Stub>& stub = Stub::GetOrCreateInstance();
+    if (!stub->StubToParentServiceActive()) {
+      LOG_ERROR << "Cannot communicate with parent service";
+      return false;
+    }
+    stub->EnqueueIsCancelled(this);
+    updating_ = true;
+  }
+  cv_.wait(lk, [this] { return !updating_; });
+  return is_cancelled_;
+}
+
+void
+PbCancel::ReportIsCancelled(bool is_cancelled)
+{
+  {
+    std::lock_guard<std::mutex> lk(mu_);
+    is_cancelled_ = is_cancelled;
+    updating_ = false;
+  }
+  cv_.notify_all();
+}
+
+}}}  // namespace triton::backend::python
diff --git a/src/pb_cancel.h b/src/pb_cancel.h
@@ -0,0 +1,66 @@
+// Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#pragma once
+
+#include <condition_variable>
+#include <mutex>
+
+#include "pb_utils.h"
+
+namespace triton { namespace backend { namespace python {
+
+class PbCancel {
+ public:
+  PbCancel(intptr_t response_factory_address, intptr_t request_address)
+      : updating_(false), response_factory_address_(response_factory_address),
+        request_address_(request_address), is_cancelled_(false)
+  {
+  }
+  DISALLOW_COPY_AND_ASSIGN(PbCancel);
+
+  void SaveToSharedMemory(std::unique_ptr<SharedMemoryManager>& shm_pool);
+  bi::managed_external_buffer::handle_t ShmHandle();
+  IsCancelledMessage* ShmPayload();
+
+  bool IsCancelledInternalFlag();
+
+  bool IsCancelled();
+  void ReportIsCancelled(bool is_cancelled);
+
+ private:
+  AllocatedSharedMemory<IsCancelledMessage> cancel_shm_;
+
+  std::mutex mu_;
+  std::condition_variable cv_;
+  bool updating_;
+
+  intptr_t response_factory_address_;
+  intptr_t request_address_;
+  bool is_cancelled_;
+};
+
+}}};  // namespace triton::backend::python