Skip to content

Commit

Permalink
OpenAi endpoint support (#476)
Browse files Browse the repository at this point in the history
* Add openai service-kind and add endpoint to CLI

* Add openai to model parser

* OpenAI client backend + cmake

* Create OpenAI backend

* New JSON datatype for PA. Show json data available at http_client level

* Add an output to OpenAI models

* Add OpenAI client (#482)

* Add OpenAI client

* Address comment

* Pass endpoint to openai client

* Resolve fixmes

* update copyright years

* more cleanup

* Update src/c++/perf_analyzer/command_line_parser.cc

Co-authored-by: dyastremsky <[email protected]>

* remove 'file' from top of files

* clean up help message and add endpoint to help

* Fix client stats

* remove unused fn

* Assert on json input format

* Use a single SEND_END point

* Add sync assert. Add OPENAI to helper fn

* remove unused typedef

* Add batch size assert

* Address comment (#487)

* Address comment

* Update src/c++/perf_analyzer/client_backend/openai/openai_client.cc

* Update src/c++/perf_analyzer/client_backend/openai/http_client.cc

* formatting

---------

Co-authored-by: Timothy Gerdes <[email protected]>
Co-authored-by: tgerdes <[email protected]>

* Make copy of exported data so it isn't corrupted (#488)

---------

Co-authored-by: oandreeva-nv <[email protected]>
Co-authored-by: GuanLuo <[email protected]>
Co-authored-by: dyastremsky <[email protected]>
  • Loading branch information
4 people authored Mar 6, 2024
1 parent 6225a3f commit a11ffa2
Show file tree
Hide file tree
Showing 28 changed files with 1,635 additions and 78 deletions.
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -45,6 +45,7 @@ option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
Expand Down Expand Up @@ -142,6 +143,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI

ExternalProject_Add(cc-clients
PREFIX cc-clients
Expand All @@ -167,6 +171,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down Expand Up @@ -209,6 +214,7 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down
2 changes: 0 additions & 2 deletions src/c++/library/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <algorithm>
#include <chrono>
#include <condition_variable>
Expand Down
2 changes: 0 additions & 2 deletions src/c++/library/grpc_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <grpcpp/grpcpp.h>

#include <queue>
Expand Down
6 changes: 2 additions & 4 deletions src/c++/library/http_client.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand All @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <map>
#include <memory>

Expand All @@ -49,7 +47,7 @@ struct HttpSslOptions {
enum KEYTYPE {
KEY_PEM = 0,
KEY_DER = 1
// TODO: Support loading private key from crypto engine
// TODO TMA-1645: Support loading private key from crypto engine
// KEY_ENG = 2
};
explicit HttpSslOptions()
Expand Down
9 changes: 8 additions & 1 deletion src/c++/perf_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -170,6 +170,13 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
target_compile_definitions(
client-backend-library
PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1
)
endif()

install(
TARGETS perf_analyzer
RUNTIME DESTINATION bin
Expand Down
15 changes: 14 additions & 1 deletion src/c++/perf_analyzer/client_backend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -43,6 +43,10 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
add_subdirectory(torchserve)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
add_subdirectory(openai)
endif()

set(
CLIENT_BACKEND_SRCS
client_backend.cc
Expand Down Expand Up @@ -71,6 +75,12 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:ts-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
set(OPENAI_LIBRARY $<TARGET_OBJECTS:openai-client-backend-library>)
set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:openai-client-backend-library,LINK_LIBRARIES>)
set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:openai-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

add_library(
client-backend-library
${CLIENT_BACKEND_SRCS}
Expand All @@ -80,6 +90,7 @@ add_library(
${CAPI_LIBRARY}
${TFS_LIBRARY}
${TS_LIBRARY}
${OPENAI_LIBRARY}
)

target_link_libraries(
Expand All @@ -89,6 +100,7 @@ target_link_libraries(
${CAPI_TARGET_LINK_LIBRARY}
${TFS_TARGET_LINK_LIBRARY}
${TS_TARGET_LINK_LIBRARY}
${OPENAI_TARGET_LINK_LIBRARY}
)

target_include_directories(
Expand All @@ -97,4 +109,5 @@ target_include_directories(
${CAPI_TARGET_INCLUDE_DIRECTORY}
${TFS_TARGET_INCLUDE_DIRECTORY}
${TS_TARGET_INCLUDE_DIRECTORY}
${OPENAI_TARGET_INCLUDE_DIRECTORY}
)
44 changes: 35 additions & 9 deletions src/c++/perf_analyzer/client_backend/client_backend.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -32,6 +32,10 @@
#include "triton_c_api/triton_c_api_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_C_API

#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
#include "openai/openai_client_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI

#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
#include "tensorflow_serving/tfserve_client_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_TFS
Expand Down Expand Up @@ -86,6 +90,9 @@ BackendKindToString(const BackendKind kind)
case TRITON_C_API:
return std::string("TRITON_C_API");
break;
case OPENAI:
return std::string("OPENAI");
break;
default:
return std::string("UNKNOWN");
break;
Expand All @@ -112,8 +119,8 @@ BackendToGrpcType(const GrpcCompressionAlgorithm compression_algorithm)
//
Error
ClientBackendFactory::Create(
const BackendKind kind, const std::string& url, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const BackendKind kind, const std::string& url, const std::string& endpoint,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers,
Expand All @@ -124,9 +131,10 @@ ClientBackendFactory::Create(
std::shared_ptr<ClientBackendFactory>* factory)
{
factory->reset(new ClientBackendFactory(
kind, url, protocol, ssl_options, trace_options, compression_algorithm,
http_headers, triton_server_path, model_repository_path, verbose,
metrics_url, input_tensor_format, output_tensor_format));
kind, url, endpoint, protocol, ssl_options, trace_options,
compression_algorithm, http_headers, triton_server_path,
model_repository_path, verbose, metrics_url, input_tensor_format,
output_tensor_format));
return Error::Success;
}

Expand All @@ -135,7 +143,7 @@ ClientBackendFactory::CreateClientBackend(
std::unique_ptr<ClientBackend>* client_backend)
{
RETURN_IF_CB_ERROR(ClientBackend::Create(
kind_, url_, protocol_, ssl_options_, trace_options_,
kind_, url_, endpoint_, protocol_, ssl_options_, trace_options_,
compression_algorithm_, http_headers_, verbose_, triton_server_path,
model_repository_path_, metrics_url_, input_tensor_format_,
output_tensor_format_, client_backend));
Expand All @@ -153,8 +161,8 @@ ClientBackendFactory::Kind()
//
Error
ClientBackend::Create(
const BackendKind kind, const std::string& url, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const BackendKind kind, const std::string& url, const std::string& endpoint,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers, const bool verbose,
Expand All @@ -172,6 +180,12 @@ ClientBackend::Create(
metrics_url, input_tensor_format, output_tensor_format,
&local_backend));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(openai::OpenAiClientBackend::Create(
url, endpoint, protocol, http_headers, verbose, &local_backend));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(tfserving::TFServeClientBackend::Create(
Expand Down Expand Up @@ -421,6 +435,12 @@ InferInput::Create(
RETURN_IF_CB_ERROR(tritonremote::TritonInferInput::Create(
infer_input, name, dims, datatype));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(
openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(tfserving::TFServeInferInput::Create(
Expand Down Expand Up @@ -505,6 +525,12 @@ InferRequestedOutput::Create(
RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create(
infer_output, name, class_count));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(
openai::OpenAiInferRequestedOutput::Create(infer_output, name));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(
Expand Down
20 changes: 13 additions & 7 deletions src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -135,7 +135,8 @@ enum BackendKind {
TRITON = 0,
TENSORFLOW_SERVING = 1,
TORCHSERVE = 2,
TRITON_C_API = 3
TRITON_C_API = 3,
OPENAI = 4
};
enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 };
enum GrpcCompressionAlgorithm {
Expand Down Expand Up @@ -267,6 +268,7 @@ class ClientBackendFactory {
/// Create a factory that can be used to construct Client Backends.
/// \param kind The kind of client backend to create.
/// \param url The inference server url and port.
/// \param endpoint The endpoint on the inference server to send requests to
/// \param protocol The protocol type used.
/// \param ssl_options The SSL options used with client backend.
/// \param compression_algorithm The compression algorithm to be used
Expand All @@ -289,7 +291,8 @@ class ClientBackendFactory {
/// \return Error object indicating success or failure.
static Error Create(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers,
Expand All @@ -308,16 +311,17 @@ class ClientBackendFactory {
private:
ClientBackendFactory(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
const std::shared_ptr<Headers> http_headers,
const std::string& triton_server_path,
const std::string& model_repository_path, const bool verbose,
const std::string& metrics_url, const TensorFormat input_tensor_format,
const TensorFormat output_tensor_format)
: kind_(kind), url_(url), protocol_(protocol), ssl_options_(ssl_options),
trace_options_(trace_options),
: kind_(kind), url_(url), endpoint_(endpoint), protocol_(protocol),
ssl_options_(ssl_options), trace_options_(trace_options),
compression_algorithm_(compression_algorithm),
http_headers_(http_headers), triton_server_path(triton_server_path),
model_repository_path_(model_repository_path), verbose_(verbose),
Expand All @@ -328,6 +332,7 @@ class ClientBackendFactory {

const BackendKind kind_;
const std::string url_;
const std::string endpoint_;
const ProtocolType protocol_;
const SslOptionsBase& ssl_options_;
const std::map<std::string, std::vector<std::string>> trace_options_;
Expand Down Expand Up @@ -360,7 +365,8 @@ class ClientBackend {
public:
static Error Create(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers, const bool verbose,
Expand Down
60 changes: 60 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required (VERSION 3.18)

set(
OPENAI_CLIENT_BACKEND_SRCS
http_client.cc
openai_client_backend.cc
openai_client.cc
openai_infer_input.cc
)

set(
OPENAI_CLIENT_BACKEND_HDRS
http_client.h
openai_client_backend.h
openai_client.h
openai_infer_input.h
)

add_library(
openai-client-backend-library EXCLUDE_FROM_ALL OBJECT
${OPENAI_CLIENT_BACKEND_SRCS}
${OPENAI_CLIENT_BACKEND_HDRS}
)

target_link_libraries(
openai-client-backend-library
PUBLIC CURL::libcurl
PUBLIC httpclient_static
)

if(${TRITON_ENABLE_GPU})
target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES})
endif() # TRITON_ENABLE_GPU
Loading

0 comments on commit a11ffa2

Please sign in to comment.