Skip to content

Commit

Permalink
Address comment
Browse files Browse the repository at this point in the history
  • Loading branch information
GuanLuo authored and tgerdesnv committed Mar 4, 2024
1 parent 5add529 commit df4464f
Show file tree
Hide file tree
Showing 5 changed files with 19 additions and 23 deletions.
6 changes: 3 additions & 3 deletions src/c++/perf_analyzer/client_backend/openai/http_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -93,7 +93,7 @@ struct HttpSslOptions {
std::string key;
};

// an HttpRequest object represents the context of a HTTP transaction. currently
// HttpRequest object representing the context of a HTTP transaction. Currently
// it is also designed to be the placeholder for response data, but how the
// response is stored can be revisited later.
// 'completion_callback' doesn't transfer ownership of HttpRequest, caller must
Expand Down Expand Up @@ -126,7 +126,7 @@ class HttpRequest {
size_t total_input_byte_size_{0};

// HTTP response code for the inference request
long http_code_{200};
uint32_t http_code_{200};

std::function<void(HttpRequest*)> completion_callback_{nullptr};

Expand All @@ -137,7 +137,7 @@ class HttpRequest {
protected:
const bool verbose_{false};

// The pointers to the input data.
// Pointers to the input data.
std::deque<std::pair<uint8_t*, size_t>> data_buffers_;
};

Expand Down
16 changes: 8 additions & 8 deletions src/c++/perf_analyzer/client_backend/openai/openai_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -69,7 +69,8 @@ ChatCompletionRequest::SendResponse(bool is_final, bool is_null)

ChatCompletionClient::ChatCompletionClient(
const std::string& url, bool verbose, const HttpSslOptions& ssl_options)
: HttpClient(url, verbose, ssl_options)
: HttpClient(
std::string(url + "/v1/chat/completions"), verbose, ssl_options)
{
}

Expand Down Expand Up @@ -149,8 +150,7 @@ ChatCompletionClient::ResponseHandler(
Error
ChatCompletionClient::AsyncInfer(
std::function<void(InferResult*)> callback,
std::string& serialized_request_body,
const std::string& request_id)
std::string& serialized_request_body, const std::string& request_id)
{
if (callback == nullptr) {
return Error(
Expand All @@ -167,17 +167,17 @@ ChatCompletionClient::AsyncInfer(
UpdateInferStat(request->timer_);
};
std::unique_ptr<HttpRequest> request(new ChatCompletionRequest(
std::move(completion_callback), std::move(callback), request_id, verbose_));
std::move(completion_callback), std::move(callback), request_id,
verbose_));
auto raw_request = static_cast<ChatCompletionRequest*>(request.get());
raw_request->timer_.CaptureTimestamp(
triton::client::RequestTimers::Kind::REQUEST_START);
request->AddInput(
reinterpret_cast<uint8_t*>(serialized_request_body.data()),
serialized_request_body.size());
std::string request_uri(url_ + "/v1/chat/completions");

CURL* multi_easy_handle = curl_easy_init();
Error err = PreRunProcessing(multi_easy_handle, request_uri, raw_request);
Error err = PreRunProcessing(multi_easy_handle, raw_request);
if (!err.IsOk()) {
curl_easy_cleanup(multi_easy_handle);
return err;
Expand All @@ -191,9 +191,9 @@ ChatCompletionClient::AsyncInfer(

Error
ChatCompletionClient::PreRunProcessing(
CURL* curl, std::string& request_uri, ChatCompletionRequest* request)
CURL* curl, ChatCompletionRequest* request)
{
curl_easy_setopt(curl, CURLOPT_URL, request_uri.c_str());
curl_easy_setopt(curl, CURLOPT_URL, url_.c_str());
curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0");
curl_easy_setopt(curl, CURLOPT_POST, 1L);
curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L);
Expand Down
11 changes: 3 additions & 8 deletions src/c++/perf_analyzer/client_backend/openai/openai_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -117,8 +117,7 @@ class ChatCompletionRequest : public HttpRequest {
ChatCompletionRequest(
std::function<void(HttpRequest*)>&& completion_callback,
std::function<void(InferResult*)>&& response_callback,
const std::string& request_id,
const bool verbose = false)
const std::string& request_id, const bool verbose = false)
: HttpRequest(std::move(completion_callback), verbose),
response_callback_(std::move(response_callback)),
request_id_(request_id)
Expand All @@ -137,7 +136,6 @@ class ChatCompletionClient : public HttpClient {
virtual ~ChatCompletionClient() = default;

/// Create a client that can be used to communicate with the server.
/// \param client Returns a new InferenceServerHttpClient object.
/// \param server_url The inference server name, port, optional
/// scheme and optional base path in the following format:
/// <scheme://>host:port/<base-path>.
Expand All @@ -149,7 +147,6 @@ class ChatCompletionClient : public HttpClient {
/// The use of SSL/TLS depends entirely on the server endpoint.
/// These options will be ignored if the server_url does not
/// expose `https://` scheme.
/// \return Error object indicating success or failure.
ChatCompletionClient(
const std::string& server_url, bool verbose = false,
const HttpSslOptions& ssl_options = HttpSslOptions());
Expand All @@ -159,8 +156,7 @@ class ChatCompletionClient : public HttpClient {
/// with a OpenAI-compatible server in both streaming and non-streaming case.
Error AsyncInfer(
std::function<void(InferResult*)> callback,
std::string& serialized_request_body,
const std::string& request_id);
std::string& serialized_request_body, const std::string& request_id);

const InferStat& ClientInferStat() { return infer_stat_; }

Expand All @@ -169,8 +165,7 @@ class ChatCompletionClient : public HttpClient {

private:
// setup curl handle
Error PreRunProcessing(
CURL* curl, std::string& request_uri, ChatCompletionRequest* request);
Error PreRunProcessing(CURL* curl, ChatCompletionRequest* request);

static size_t ResponseHandler(
void* contents, size_t size, size_t nmemb, void* userp);
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -66,8 +66,8 @@ OpenAiClientBackend::AsyncInfer(

auto raw_input = dynamic_cast<OpenAiInferInput*>(inputs[0]);
raw_input->PrepareForRequest();
RETURN_IF_CB_ERROR(
http_client_->AsyncInfer(callback, raw_input->DataString(), options.request_id_));
RETURN_IF_CB_ERROR(http_client_->AsyncInfer(
callback, raw_input->DataString(), options.request_id_));
return Error::Success;
}

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -51,9 +51,10 @@ class OpenAiInferInput : public InferInput {
Error Reset() override;
/// See InferInput::AppendRaw()
Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
/// Resets the heads to start providing data from the beginning.
/// Prepare the input to be in the form expected by an OpenAI client,
/// must call before accessing the data.
Error PrepareForRequest();
/// Get the next chunk of data if available.
/// Get the contiguous data in string.
std::string& DataString() { return data_str_; }

private:
Expand Down

0 comments on commit df4464f

Please sign in to comment.