Address comment

triton-inference-server · Mar 4, 2024 · df4464f · df4464f
1 parent 5add529
commit df4464f
Show file tree

Hide file tree

Showing 5 changed files with 19 additions and 23 deletions.
diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h
@@ -93,7 +93,7 @@ struct HttpSslOptions {
   std::string key;
 };
 
-// an HttpRequest object represents the context of a HTTP transaction. currently
+// HttpRequest object representing the context of a HTTP transaction. Currently
 // it is also designed to be the placeholder for response data, but how the
 // response is stored can be revisited later.
 // 'completion_callback' doesn't transfer ownership of HttpRequest, caller must
@@ -126,7 +126,7 @@ class HttpRequest {
   size_t total_input_byte_size_{0};
 
   // HTTP response code for the inference request
-  long http_code_{200};
+  uint32_t http_code_{200};
 
   std::function<void(HttpRequest*)> completion_callback_{nullptr};
 
@@ -137,7 +137,7 @@ class HttpRequest {
  protected:
   const bool verbose_{false};
 
-  // The pointers to the input data.
+  // Pointers to the input data.
   std::deque<std::pair<uint8_t*, size_t>> data_buffers_;
 };
 

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc
@@ -69,7 +69,8 @@ ChatCompletionRequest::SendResponse(bool is_final, bool is_null)
 
 ChatCompletionClient::ChatCompletionClient(
     const std::string& url, bool verbose, const HttpSslOptions& ssl_options)
-    : HttpClient(url, verbose, ssl_options)
+    : HttpClient(
+          std::string(url + "/v1/chat/completions"), verbose, ssl_options)
 {
 }
 
@@ -149,8 +150,7 @@ ChatCompletionClient::ResponseHandler(
 Error
 ChatCompletionClient::AsyncInfer(
     std::function<void(InferResult*)> callback,
-    std::string& serialized_request_body,
-    const std::string& request_id)
+    std::string& serialized_request_body, const std::string& request_id)
 {
   if (callback == nullptr) {
     return Error(
@@ -167,17 +167,17 @@ ChatCompletionClient::AsyncInfer(
     UpdateInferStat(request->timer_);
   };
   std::unique_ptr<HttpRequest> request(new ChatCompletionRequest(
-      std::move(completion_callback), std::move(callback), request_id, verbose_));
+      std::move(completion_callback), std::move(callback), request_id,
+      verbose_));
   auto raw_request = static_cast<ChatCompletionRequest*>(request.get());
   raw_request->timer_.CaptureTimestamp(
       triton::client::RequestTimers::Kind::REQUEST_START);
   request->AddInput(
       reinterpret_cast<uint8_t*>(serialized_request_body.data()),
       serialized_request_body.size());
-  std::string request_uri(url_ + "/v1/chat/completions");
 
   CURL* multi_easy_handle = curl_easy_init();
-  Error err = PreRunProcessing(multi_easy_handle, request_uri, raw_request);
+  Error err = PreRunProcessing(multi_easy_handle, raw_request);
   if (!err.IsOk()) {
     curl_easy_cleanup(multi_easy_handle);
     return err;
@@ -191,9 +191,9 @@ ChatCompletionClient::AsyncInfer(
 
 Error
 ChatCompletionClient::PreRunProcessing(
-    CURL* curl, std::string& request_uri, ChatCompletionRequest* request)
+    CURL* curl, ChatCompletionRequest* request)
 {
-  curl_easy_setopt(curl, CURLOPT_URL, request_uri.c_str());
+  curl_easy_setopt(curl, CURLOPT_URL, url_.c_str());
   curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
   curl_easy_setopt(curl, CURLOPT_POST, 1L);
   curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h
@@ -117,8 +117,7 @@ class ChatCompletionRequest : public HttpRequest {
   ChatCompletionRequest(
       std::function<void(HttpRequest*)>&& completion_callback,
       std::function<void(InferResult*)>&& response_callback,
-      const std::string& request_id,
-      const bool verbose = false)
+      const std::string& request_id, const bool verbose = false)
       : HttpRequest(std::move(completion_callback), verbose),
         response_callback_(std::move(response_callback)),
         request_id_(request_id)
@@ -137,7 +136,6 @@ class ChatCompletionClient : public HttpClient {
   virtual ~ChatCompletionClient() = default;
 
   /// Create a client that can be used to communicate with the server.
-  /// \param client Returns a new InferenceServerHttpClient object.
   /// \param server_url The inference server name, port, optional
   /// scheme and optional base path in the following format:
   /// <scheme://>host:port/<base-path>.
@@ -149,7 +147,6 @@ class ChatCompletionClient : public HttpClient {
   /// The use of SSL/TLS depends entirely on the server endpoint.
   /// These options will be ignored if the server_url does not
   /// expose `https://` scheme.
-  /// \return Error object indicating success or failure.
   ChatCompletionClient(
       const std::string& server_url, bool verbose = false,
       const HttpSslOptions& ssl_options = HttpSslOptions());
@@ -159,8 +156,7 @@ class ChatCompletionClient : public HttpClient {
   /// with a OpenAI-compatible server in both streaming and non-streaming case.
   Error AsyncInfer(
       std::function<void(InferResult*)> callback,
-      std::string& serialized_request_body,
-      const std::string& request_id);
+      std::string& serialized_request_body, const std::string& request_id);
 
   const InferStat& ClientInferStat() { return infer_stat_; }
 
@@ -169,8 +165,7 @@ class ChatCompletionClient : public HttpClient {
 
  private:
   // setup curl handle
-  Error PreRunProcessing(
-      CURL* curl, std::string& request_uri, ChatCompletionRequest* request);
+  Error PreRunProcessing(CURL* curl, ChatCompletionRequest* request);
 
   static size_t ResponseHandler(
       void* contents, size_t size, size_t nmemb, void* userp);

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
@@ -66,8 +66,8 @@ OpenAiClientBackend::AsyncInfer(
 
   auto raw_input = dynamic_cast<OpenAiInferInput*>(inputs[0]);
   raw_input->PrepareForRequest();
-  RETURN_IF_CB_ERROR(
-      http_client_->AsyncInfer(callback, raw_input->DataString(), options.request_id_));
+  RETURN_IF_CB_ERROR(http_client_->AsyncInfer(
+      callback, raw_input->DataString(), options.request_id_));
   return Error::Success;
 }
 

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
@@ -51,9 +51,10 @@ class OpenAiInferInput : public InferInput {
   Error Reset() override;
   /// See InferInput::AppendRaw()
   Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
-  /// Resets the heads to start providing data from the beginning.
+  /// Prepare the input to be in the form expected by an OpenAI client,
+  /// must call before accessing the data.
   Error PrepareForRequest();
-  /// Get the next chunk of data if available.
+  /// Get the contiguous data in string.
   std::string& DataString() { return data_str_; }
 
  private: