diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index 995fc28fd..9b167fae1 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -63,13 +63,6 @@ namespace openai { void ChatCompletionRequest::SendResponse(bool is_final, bool is_null) { - // if final response has already been sent - // due to detecting the [DONE] - // ignore final response due to request completion - if (final_response_sent_) { - return; - } - final_response_sent_ = is_final; response_callback_(new ChatCompletionResult( http_code_, std::move(response_buffer_), is_final, is_null, request_id_)); @@ -182,9 +175,11 @@ ChatCompletionClient::AsyncInfer( UpdateInferStat(request->timer_); // Send final response on request completion - // Ignored if final response has already been sent - // (in the case of seeing [DONE] in streaming case) - request->SendResponse(true /* is_final */, false /* is_null */); + // if it has not already been sent. + // (e.g. in the case of seeing [DONE] in streaming case) + if (!request->IsFinalResponseSent()) { + request->SendResponse(true /* is_final */, false /* is_null */); + } }; std::unique_ptr request(new ChatCompletionRequest( std::move(completion_callback), std::move(callback), request_id, diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h index e63728fc4..00ccbd5fa 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -121,6 +121,7 @@ class ChatCompletionRequest : public HttpRequest { request_id_(request_id) { } + bool IsFinalResponseSent() { return final_response_sent_; }; void SendResponse(bool is_final, bool is_null); bool is_stream_{false}; std::function response_callback_{nullptr};