Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

OpenAi endpoint support #476

Merged
merged 23 commits into from
Mar 6, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -45,6 +45,7 @@ option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
Expand Down Expand Up @@ -142,6 +143,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI

ExternalProject_Add(cc-clients
PREFIX cc-clients
Expand All @@ -167,6 +171,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down Expand Up @@ -209,6 +214,7 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down
2 changes: 0 additions & 2 deletions src/c++/library/common.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <algorithm>
#include <chrono>
#include <condition_variable>
Expand Down
2 changes: 0 additions & 2 deletions src/c++/library/grpc_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <grpcpp/grpcpp.h>

#include <queue>
Expand Down
6 changes: 2 additions & 4 deletions src/c++/library/http_client.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand All @@ -25,8 +25,6 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

/// \file

#include <map>
#include <memory>

Expand All @@ -49,7 +47,7 @@ struct HttpSslOptions {
enum KEYTYPE {
KEY_PEM = 0,
KEY_DER = 1
// TODO: Support loading private key from crypto engine
// TODO TMA-1645: Support loading private key from crypto engine
// KEY_ENG = 2
tgerdesnv marked this conversation as resolved.
Show resolved Hide resolved
};
explicit HttpSslOptions()
Expand Down
9 changes: 8 additions & 1 deletion src/c++/perf_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -170,6 +170,13 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
target_compile_definitions(
client-backend-library
PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1
)
endif()

install(
TARGETS perf_analyzer
RUNTIME DESTINATION bin
Expand Down
15 changes: 14 additions & 1 deletion src/c++/perf_analyzer/client_backend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -43,6 +43,10 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
add_subdirectory(torchserve)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
add_subdirectory(openai)
endif()

set(
CLIENT_BACKEND_SRCS
client_backend.cc
Expand Down Expand Up @@ -71,6 +75,12 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:ts-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
set(OPENAI_LIBRARY $<TARGET_OBJECTS:openai-client-backend-library>)
set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:openai-client-backend-library,LINK_LIBRARIES>)
set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:openai-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

add_library(
client-backend-library
${CLIENT_BACKEND_SRCS}
Expand All @@ -80,6 +90,7 @@ add_library(
${CAPI_LIBRARY}
${TFS_LIBRARY}
${TS_LIBRARY}
${OPENAI_LIBRARY}
)

target_link_libraries(
Expand All @@ -89,6 +100,7 @@ target_link_libraries(
${CAPI_TARGET_LINK_LIBRARY}
${TFS_TARGET_LINK_LIBRARY}
${TS_TARGET_LINK_LIBRARY}
${OPENAI_TARGET_LINK_LIBRARY}
)

target_include_directories(
Expand All @@ -97,4 +109,5 @@ target_include_directories(
${CAPI_TARGET_INCLUDE_DIRECTORY}
${TFS_TARGET_INCLUDE_DIRECTORY}
${TS_TARGET_INCLUDE_DIRECTORY}
${OPENAI_TARGET_INCLUDE_DIRECTORY}
)
44 changes: 35 additions & 9 deletions src/c++/perf_analyzer/client_backend/client_backend.cc
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -32,6 +32,10 @@
#include "triton_c_api/triton_c_api_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_C_API

#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
#include "openai/openai_client_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI

#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
#include "tensorflow_serving/tfserve_client_backend.h"
#endif // TRITON_ENABLE_PERF_ANALYZER_TFS
Expand Down Expand Up @@ -86,6 +90,9 @@ BackendKindToString(const BackendKind kind)
case TRITON_C_API:
return std::string("TRITON_C_API");
break;
case OPENAI:
return std::string("OPENAI");
break;
default:
return std::string("UNKNOWN");
break;
Expand All @@ -112,8 +119,8 @@ BackendToGrpcType(const GrpcCompressionAlgorithm compression_algorithm)
//
Error
ClientBackendFactory::Create(
const BackendKind kind, const std::string& url, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const BackendKind kind, const std::string& url, const std::string& endpoint,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers,
Expand All @@ -124,9 +131,10 @@ ClientBackendFactory::Create(
std::shared_ptr<ClientBackendFactory>* factory)
{
factory->reset(new ClientBackendFactory(
kind, url, protocol, ssl_options, trace_options, compression_algorithm,
http_headers, triton_server_path, model_repository_path, verbose,
metrics_url, input_tensor_format, output_tensor_format));
kind, url, endpoint, protocol, ssl_options, trace_options,
compression_algorithm, http_headers, triton_server_path,
model_repository_path, verbose, metrics_url, input_tensor_format,
output_tensor_format));
return Error::Success;
}

Expand All @@ -135,7 +143,7 @@ ClientBackendFactory::CreateClientBackend(
std::unique_ptr<ClientBackend>* client_backend)
{
RETURN_IF_CB_ERROR(ClientBackend::Create(
kind_, url_, protocol_, ssl_options_, trace_options_,
kind_, url_, endpoint_, protocol_, ssl_options_, trace_options_,
compression_algorithm_, http_headers_, verbose_, triton_server_path,
model_repository_path_, metrics_url_, input_tensor_format_,
output_tensor_format_, client_backend));
Expand All @@ -153,8 +161,8 @@ ClientBackendFactory::Kind()
//
Error
ClientBackend::Create(
const BackendKind kind, const std::string& url, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const BackendKind kind, const std::string& url, const std::string& endpoint,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers, const bool verbose,
Expand All @@ -172,6 +180,12 @@ ClientBackend::Create(
metrics_url, input_tensor_format, output_tensor_format,
&local_backend));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(openai::OpenAiClientBackend::Create(
url, endpoint, protocol, http_headers, verbose, &local_backend));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(tfserving::TFServeClientBackend::Create(
Expand Down Expand Up @@ -421,6 +435,12 @@ InferInput::Create(
RETURN_IF_CB_ERROR(tritonremote::TritonInferInput::Create(
infer_input, name, dims, datatype));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(
openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(tfserving::TFServeInferInput::Create(
Expand Down Expand Up @@ -505,6 +525,12 @@ InferRequestedOutput::Create(
RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create(
infer_output, name, class_count));
}
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(
openai::OpenAiInferRequestedOutput::Create(infer_output, name));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
else if (kind == TENSORFLOW_SERVING) {
RETURN_IF_CB_ERROR(
Expand Down
20 changes: 13 additions & 7 deletions src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -135,7 +135,8 @@ enum BackendKind {
TRITON = 0,
TENSORFLOW_SERVING = 1,
TORCHSERVE = 2,
TRITON_C_API = 3
TRITON_C_API = 3,
OPENAI = 4
};
enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 };
enum GrpcCompressionAlgorithm {
Expand Down Expand Up @@ -267,6 +268,7 @@ class ClientBackendFactory {
/// Create a factory that can be used to construct Client Backends.
/// \param kind The kind of client backend to create.
/// \param url The inference server url and port.
/// \param endpoint The endpoint on the inference server to send requests to
/// \param protocol The protocol type used.
/// \param ssl_options The SSL options used with client backend.
/// \param compression_algorithm The compression algorithm to be used
Expand All @@ -289,7 +291,8 @@ class ClientBackendFactory {
/// \return Error object indicating success or failure.
static Error Create(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers,
Expand All @@ -308,16 +311,17 @@ class ClientBackendFactory {
private:
ClientBackendFactory(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
const std::shared_ptr<Headers> http_headers,
const std::string& triton_server_path,
const std::string& model_repository_path, const bool verbose,
const std::string& metrics_url, const TensorFormat input_tensor_format,
const TensorFormat output_tensor_format)
: kind_(kind), url_(url), protocol_(protocol), ssl_options_(ssl_options),
trace_options_(trace_options),
: kind_(kind), url_(url), endpoint_(endpoint), protocol_(protocol),
ssl_options_(ssl_options), trace_options_(trace_options),
compression_algorithm_(compression_algorithm),
http_headers_(http_headers), triton_server_path(triton_server_path),
model_repository_path_(model_repository_path), verbose_(verbose),
Expand All @@ -328,6 +332,7 @@ class ClientBackendFactory {

const BackendKind kind_;
const std::string url_;
const std::string endpoint_;
const ProtocolType protocol_;
const SslOptionsBase& ssl_options_;
const std::map<std::string, std::vector<std::string>> trace_options_;
Expand Down Expand Up @@ -360,7 +365,8 @@ class ClientBackend {
public:
static Error Create(
const BackendKind kind, const std::string& url,
const ProtocolType protocol, const SslOptionsBase& ssl_options,
const std::string& endpoint, const ProtocolType protocol,
const SslOptionsBase& ssl_options,
const std::map<std::string, std::vector<std::string>> trace_options,
const GrpcCompressionAlgorithm compression_algorithm,
std::shared_ptr<Headers> http_headers, const bool verbose,
Expand Down
60 changes: 60 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required (VERSION 3.18)

set(
OPENAI_CLIENT_BACKEND_SRCS
http_client.cc
openai_client_backend.cc
openai_client.cc
openai_infer_input.cc
)

set(
OPENAI_CLIENT_BACKEND_HDRS
http_client.h
openai_client_backend.h
openai_client.h
openai_infer_input.h
)

add_library(
openai-client-backend-library EXCLUDE_FROM_ALL OBJECT
${OPENAI_CLIENT_BACKEND_SRCS}
${OPENAI_CLIENT_BACKEND_HDRS}
)

target_link_libraries(
openai-client-backend-library
PUBLIC CURL::libcurl
PUBLIC httpclient_static
mc-nv marked this conversation as resolved.
Show resolved Hide resolved
)

if(${TRITON_ENABLE_GPU})
target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES})
endif() # TRITON_ENABLE_GPU
Loading
Loading