diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h index 3caa94992..c6acfd524 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.h @@ -93,7 +93,7 @@ struct HttpSslOptions { std::string key; }; -// an HttpRequest object represents the context of a HTTP transaction. currently +// HttpRequest object representing the context of a HTTP transaction. Currently // it is also designed to be the placeholder for response data, but how the // response is stored can be revisited later. // 'completion_callback' doesn't transfer ownership of HttpRequest, caller must @@ -126,7 +126,7 @@ class HttpRequest { size_t total_input_byte_size_{0}; // HTTP response code for the inference request - long http_code_{200}; + uint32_t http_code_{200}; std::function completion_callback_{nullptr}; @@ -137,7 +137,7 @@ class HttpRequest { protected: const bool verbose_{false}; - // The pointers to the input data. + // Pointers to the input data. std::deque> data_buffers_; }; diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index 0b7c85c00..f83c3976b 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -69,7 +69,8 @@ ChatCompletionRequest::SendResponse(bool is_final, bool is_null) ChatCompletionClient::ChatCompletionClient( const std::string& url, bool verbose, const HttpSslOptions& ssl_options) - : HttpClient(url, verbose, ssl_options) + : HttpClient( + std::string(url + "/v1/chat/completions"), verbose, ssl_options) { } @@ -149,8 +150,7 @@ ChatCompletionClient::ResponseHandler( Error ChatCompletionClient::AsyncInfer( std::function callback, - std::string& serialized_request_body, - const std::string& request_id) + std::string& serialized_request_body, const std::string& request_id) { if (callback == nullptr) { return Error( @@ -167,17 +167,17 @@ ChatCompletionClient::AsyncInfer( UpdateInferStat(request->timer_); }; std::unique_ptr request(new ChatCompletionRequest( - std::move(completion_callback), std::move(callback), request_id, verbose_)); + std::move(completion_callback), std::move(callback), request_id, + verbose_)); auto raw_request = static_cast(request.get()); raw_request->timer_.CaptureTimestamp( triton::client::RequestTimers::Kind::REQUEST_START); request->AddInput( reinterpret_cast(serialized_request_body.data()), serialized_request_body.size()); - std::string request_uri(url_ + "/v1/chat/completions"); CURL* multi_easy_handle = curl_easy_init(); - Error err = PreRunProcessing(multi_easy_handle, request_uri, raw_request); + Error err = PreRunProcessing(multi_easy_handle, raw_request); if (!err.IsOk()) { curl_easy_cleanup(multi_easy_handle); return err; @@ -191,9 +191,9 @@ ChatCompletionClient::AsyncInfer( Error ChatCompletionClient::PreRunProcessing( - CURL* curl, std::string& request_uri, ChatCompletionRequest* request) + CURL* curl, ChatCompletionRequest* request) { - curl_easy_setopt(curl, CURLOPT_URL, request_uri.c_str()); + curl_easy_setopt(curl, CURLOPT_URL, url_.c_str()); curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); curl_easy_setopt(curl, CURLOPT_POST, 1L); curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L); diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h index 38d0f8f04..bff2d299f 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -117,8 +117,7 @@ class ChatCompletionRequest : public HttpRequest { ChatCompletionRequest( std::function&& completion_callback, std::function&& response_callback, - const std::string& request_id, - const bool verbose = false) + const std::string& request_id, const bool verbose = false) : HttpRequest(std::move(completion_callback), verbose), response_callback_(std::move(response_callback)), request_id_(request_id) @@ -137,7 +136,6 @@ class ChatCompletionClient : public HttpClient { virtual ~ChatCompletionClient() = default; /// Create a client that can be used to communicate with the server. - /// \param client Returns a new InferenceServerHttpClient object. /// \param server_url The inference server name, port, optional /// scheme and optional base path in the following format: /// host:port/. @@ -149,7 +147,6 @@ class ChatCompletionClient : public HttpClient { /// The use of SSL/TLS depends entirely on the server endpoint. /// These options will be ignored if the server_url does not /// expose `https://` scheme. - /// \return Error object indicating success or failure. ChatCompletionClient( const std::string& server_url, bool verbose = false, const HttpSslOptions& ssl_options = HttpSslOptions()); @@ -159,8 +156,7 @@ class ChatCompletionClient : public HttpClient { /// with a OpenAI-compatible server in both streaming and non-streaming case. Error AsyncInfer( std::function callback, - std::string& serialized_request_body, - const std::string& request_id); + std::string& serialized_request_body, const std::string& request_id); const InferStat& ClientInferStat() { return infer_stat_; } @@ -169,8 +165,7 @@ class ChatCompletionClient : public HttpClient { private: // setup curl handle - Error PreRunProcessing( - CURL* curl, std::string& request_uri, ChatCompletionRequest* request); + Error PreRunProcessing(CURL* curl, ChatCompletionRequest* request); static size_t ResponseHandler( void* contents, size_t size, size_t nmemb, void* userp); diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc index 968973d42..9f62beb29 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -66,8 +66,8 @@ OpenAiClientBackend::AsyncInfer( auto raw_input = dynamic_cast(inputs[0]); raw_input->PrepareForRequest(); - RETURN_IF_CB_ERROR( - http_client_->AsyncInfer(callback, raw_input->DataString(), options.request_id_)); + RETURN_IF_CB_ERROR(http_client_->AsyncInfer( + callback, raw_input->DataString(), options.request_id_)); return Error::Success; } diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h index 9ccf0945c..0c192cfad 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h @@ -51,9 +51,10 @@ class OpenAiInferInput : public InferInput { Error Reset() override; /// See InferInput::AppendRaw() Error AppendRaw(const uint8_t* input, size_t input_byte_size) override; - /// Resets the heads to start providing data from the beginning. + /// Prepare the input to be in the form expected by an OpenAI client, + /// must call before accessing the data. Error PrepareForRequest(); - /// Get the next chunk of data if available. + /// Get the contiguous data in string. std::string& DataString() { return data_str_; } private: