diff --git a/src/response_sender.cc b/src/response_sender.cc index 1831601f..0a88fb6b 100644 --- a/src/response_sender.cc +++ b/src/response_sender.cc @@ -62,16 +62,14 @@ ResponseSender::ResponseSender( response_factory_address_(response_factory_address), is_decoupled_(is_decoupled), requested_output_names_(requested_output_names), shm_pool_(shm_pool), - pb_cancel_(pb_cancel), closed_(false), number_of_response_sent_(0) + pb_cancel_(pb_cancel), closed_(false), number_of_response_sent_(0), + response_factory_deleted_(false) { } ResponseSender::~ResponseSender() { - std::unique_ptr& stub = Stub::GetOrCreateInstance(); - stub->EnqueueCleanupId( - reinterpret_cast(response_factory_address_), - PYTHONSTUB_DecoupledResponseFactoryCleanup); + DeleteResponseFactory(); } void @@ -248,6 +246,10 @@ ResponseSender::Send( "An error occurred while sending a response."); } } + + if (flags == TRITONSERVER_RESPONSE_COMPLETE_FINAL) { + DeleteResponseFactory(); + } } bool @@ -263,4 +265,16 @@ ResponseSender::Close() closed_ = true; } +void +ResponseSender::DeleteResponseFactory() +{ + bool already_deleted = response_factory_deleted_.exchange(true); + if (!already_deleted) { + std::unique_ptr& stub = Stub::GetOrCreateInstance(); + stub->EnqueueCleanupId( + reinterpret_cast(response_factory_address_), + PYTHONSTUB_DecoupledResponseFactoryCleanup); + } +} + }}} // namespace triton::backend::python diff --git a/src/response_sender.h b/src/response_sender.h index f274f5b4..69f416c2 100644 --- a/src/response_sender.h +++ b/src/response_sender.h @@ -26,6 +26,7 @@ #pragma once +#include #include #include "infer_response.h" @@ -52,6 +53,7 @@ class ResponseSender { private: void UpdateStateAndCounters( const std::shared_ptr& response, const uint32_t flags); + void DeleteResponseFactory(); intptr_t request_address_; intptr_t response_factory_address_; @@ -63,5 +65,7 @@ class ResponseSender { std::mutex mu_; bool closed_; size_t number_of_response_sent_; + + std::atomic response_factory_deleted_; }; }}} // namespace triton::backend::python