From c426243f1e38a66d53929a33fa5a830580b9c867 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 9 May 2024 18:34:43 -0700 Subject: [PATCH 1/5] Testing --- src/python_be.cc | 8 ++++++++ src/response_sender.cc | 9 +++++---- 2 files changed, 13 insertions(+), 4 deletions(-) diff --git a/src/python_be.cc b/src/python_be.cc index b95fb715..c08e06b0 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -1303,6 +1303,14 @@ ModelInstanceState::ResponseSendDecoupled( SetErrorForResponseSendMessage( send_message_payload, WrapTritonErrorInSharedPtr(error), error_message); } + + if (send_message_payload->flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) { + // Delete response factory + std::unique_ptr< + TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter> + response_factory(reinterpret_cast( + send_message_payload->response_factory_address)); + } } TRITONSERVER_Error* diff --git a/src/response_sender.cc b/src/response_sender.cc index 94e3f0c8..49836732 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -47,10 +47,11 @@ ResponseSender::ResponseSender( ResponseSender::~ResponseSender() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); - stub->EnqueueCleanupId( - reinterpret_cast(response_factory_address_), - PYTHONSTUB_DecoupledResponseFactoryCleanup); + // std::cerr << "===== ResponseSender::~ResponseSender() =====" << std::endl; + // std::unique_ptr& stub = Stub::GetOrCreateInstance(); + // stub->EnqueueCleanupId( + // reinterpret_cast(response_factory_address_), + // PYTHONSTUB_DecoupledResponseFactoryCleanup); } void From 1c7ff5acc36255318816639dd673868ecb30e038 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Thu, 16 May 2024 17:19:22 -0700 Subject: [PATCH 2/5] Clean up response_factory when the final flag is set --- src/pb_utils.h | 1 + src/python_be.cc | 1 + src/response_sender.cc | 18 +++++++++++------- src/response_sender.h | 11 ++++++++++- 4 files changed, 23 insertions(+), 8 deletions(-) diff --git a/src/pb_utils.h b/src/pb_utils.h index 1a6c2d8b..f302b0be 100644 --- a/src/pb_utils.h +++ b/src/pb_utils.h @@ -225,6 +225,7 @@ struct ResponseSenderBase { bi::managed_external_buffer::handle_t error; intptr_t request_address; intptr_t response_factory_address; + bool is_response_factory_cleaned; }; struct ResponseSendMessage : ResponseSenderBase { diff --git a/src/python_be.cc b/src/python_be.cc index c08e06b0..f82ba78b 100644 --- a/src/python_be.cc +++ b/src/python_be.cc @@ -1310,6 +1310,7 @@ ModelInstanceState::ResponseSendDecoupled( TRITONBACKEND_ResponseFactory, backend::ResponseFactoryDeleter> response_factory(reinterpret_cast( send_message_payload->response_factory_address)); + send_message_payload->is_response_factory_cleaned = true; } } diff --git a/src/response_sender.cc b/src/response_sender.cc index 49836732..13feed80 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -41,17 +41,18 @@ ResponseSender::ResponseSender( const std::shared_ptr& pb_cancel) : request_address_(request_address), response_factory_address_(response_factory_address), shm_pool_(shm_pool), - closed_(false), pb_cancel_(pb_cancel) + closed_(false), pb_cancel_(pb_cancel), is_response_factory_cleaned_(false) { } ResponseSender::~ResponseSender() { - // std::cerr << "===== ResponseSender::~ResponseSender() =====" << std::endl; - // std::unique_ptr& stub = Stub::GetOrCreateInstance(); - // stub->EnqueueCleanupId( - // reinterpret_cast(response_factory_address_), - // PYTHONSTUB_DecoupledResponseFactoryCleanup); + if (!is_response_factory_cleaned_) { + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + stub->EnqueueCleanupId( + reinterpret_cast(response_factory_address_), + PYTHONSTUB_DecoupledResponseFactoryCleanup); + } } void @@ -189,6 +190,9 @@ ResponseSender::Send( } } + is_response_factory_cleaned_ = + send_message_payload->is_response_factory_cleaned; + if (send_message_payload->has_error) { if (send_message_payload->is_error_set) { std::unique_ptr error = PbString::LoadFromSharedMemory( diff --git a/src/response_sender.h b/src/response_sender.h index d29a6ab6..2785aaa2 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -46,7 +46,16 @@ class ResponseSender { intptr_t request_address_; intptr_t response_factory_address_; std::unique_ptr& shm_pool_; + // The flag to indicate if the response sender is closed. It is set to true + // once the TRITONSERVER_RESPONSE_COMPLETE_FINAL flag is set, meaning that the + // response_sender should not be used anymore. This flag is separate from the + // `is_response_factory_cleaned_` flag because errors might occur after + // complete_final flag is set but before the response_factory gets cleaned up. bool closed_; std::shared_ptr pb_cancel_; + // The flag to indicate if the response_factory is already cleaned up in the + // python backend side. If not, the response_factory will be cleaned up in the + // destructor. + bool is_response_factory_cleaned_; }; }}} // namespace triton::backend::python From a593f2fd8564c27054f2d9c52a42963f0ecd792f Mon Sep 17 00:00:00 2001 From: krishung5 Date: Mon, 20 May 2024 20:12:35 -0700 Subject: [PATCH 3/5] Initialize variable --- src/response_sender.cc | 1 + 1 file changed, 1 insertion(+) diff --git a/src/response_sender.cc b/src/response_sender.cc index 13feed80..868113fa 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -114,6 +114,7 @@ ResponseSender::Send( send_message_payload->has_error = false; send_message_payload->is_error_set = false; send_message_payload->flags = flags; + send_message_payload->is_response_factory_cleaned = false; std::unique_ptr ipc_message = IPCMessage::Create(shm_pool_, false /* inline_response */); From 9e7e1bda58491ccba9ea075d6001beb54180e87e Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 21 May 2024 15:16:15 -0700 Subject: [PATCH 4/5] Update doc --- README.md | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/README.md b/README.md index 89b9213e..9eb64110 100644 --- a/README.md +++ b/README.md @@ -599,6 +599,11 @@ Starting from 23.10, request cancellation can be checked directly on the the TRITONSERVER_RESPONSE_COMPLETE_FINAL flag at the end of response is still needed even the request is cancelled. +*Note:*: If `response_sender` is reused across multiple requests, the model must +ensure that the `response_sender` is properly cleaned up when the +`pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag has not been sent for the +request and the request is rescheduled or cancelled. + ##### Use Cases The decoupled mode is powerful and supports various other use cases: From 59cbb366ea7a2fb439d3151cc8acc1621d359354 Mon Sep 17 00:00:00 2001 From: krishung5 Date: Tue, 21 May 2024 17:08:02 -0700 Subject: [PATCH 5/5] Revert --- README.md | 5 ----- 1 file changed, 5 deletions(-) diff --git a/README.md b/README.md index 9eb64110..89b9213e 100644 --- a/README.md +++ b/README.md @@ -599,11 +599,6 @@ Starting from 23.10, request cancellation can be checked directly on the the TRITONSERVER_RESPONSE_COMPLETE_FINAL flag at the end of response is still needed even the request is cancelled. -*Note:*: If `response_sender` is reused across multiple requests, the model must -ensure that the `response_sender` is properly cleaned up when the -`pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL` flag has not been sent for the -request and the request is rescheduled or cancelled. - ##### Use Cases The decoupled mode is powerful and supports various other use cases: