Skip to content

Commit

Permalink
OpenAI client backend + cmake
Browse files Browse the repository at this point in the history
  • Loading branch information
oandreeva-nv committed Feb 28, 2024
1 parent 3105ec2 commit 84b0aff
Show file tree
Hide file tree
Showing 8 changed files with 466 additions and 5 deletions.
8 changes: 7 additions & 1 deletion CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -45,6 +45,7 @@ option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF)
option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
Expand Down Expand Up @@ -142,6 +143,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI})
message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI

ExternalProject_Add(cc-clients
PREFIX cc-clients
Expand All @@ -167,6 +171,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down Expand Up @@ -209,6 +214,7 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
-DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
-DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
-DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
-DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
-DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
-DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
-DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
Expand Down
9 changes: 8 additions & 1 deletion src/c++/perf_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -170,6 +170,13 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
target_compile_definitions(
client-backend-library
PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1
)
endif()

install(
TARGETS perf_analyzer
RUNTIME DESTINATION bin
Expand Down
15 changes: 14 additions & 1 deletion src/c++/perf_analyzer/client_backend/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -43,6 +43,10 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
add_subdirectory(torchserve)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
add_subdirectory(openai)
endif()

set(
CLIENT_BACKEND_SRCS
client_backend.cc
Expand Down Expand Up @@ -71,6 +75,12 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:ts-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
set(OPENAI_LIBRARY $<TARGET_OBJECTS:openai-client-backend-library>)
set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:openai-client-backend-library,LINK_LIBRARIES>)
set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:openai-client-backend-library,INCLUDE_DIRECTORIES>)
endif()

add_library(
client-backend-library
${CLIENT_BACKEND_SRCS}
Expand All @@ -80,6 +90,7 @@ add_library(
${CAPI_LIBRARY}
${TFS_LIBRARY}
${TS_LIBRARY}
${OPENAI_LIBRARY}
)

target_link_libraries(
Expand All @@ -89,6 +100,7 @@ target_link_libraries(
${CAPI_TARGET_LINK_LIBRARY}
${TFS_TARGET_LINK_LIBRARY}
${TS_TARGET_LINK_LIBRARY}
${OPENAI_TARGET_LINK_LIBRARY}
)

target_include_directories(
Expand All @@ -97,4 +109,5 @@ target_include_directories(
${CAPI_TARGET_INCLUDE_DIRECTORY}
${TFS_TARGET_INCLUDE_DIRECTORY}
${TS_TARGET_INCLUDE_DIRECTORY}
${OPENAI_TARGET_INCLUDE_DIRECTORY}
)
5 changes: 3 additions & 2 deletions src/c++/perf_analyzer/client_backend/client_backend.h
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
Expand Down Expand Up @@ -135,7 +135,8 @@ enum BackendKind {
TRITON = 0,
TENSORFLOW_SERVING = 1,
TORCHSERVE = 2,
TRITON_C_API = 3
TRITON_C_API = 3,
OPENAI = 4
};
enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 };
enum GrpcCompressionAlgorithm {
Expand Down
56 changes: 56 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,56 @@
# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

cmake_minimum_required (VERSION 3.18)

set(
OPENAI_CLIENT_BACKEND_SRCS
openai_client_backend.cc
)

set(
OPENAI_CLIENT_BACKEND_HDRS
openai_client_backend.h
openai_http_client.h
)

add_library(
openai-client-backend-library EXCLUDE_FROM_ALL OBJECT
${OPENAI_CLIENT_BACKEND_SRCS}
${OPENAI_CLIENT_BACKEND_HDRS}
)

target_link_libraries(
openai-client-backend-library
# TODO: Assuming we'll need curl libs
PUBLIC CURL::libcurl
PUBLIC httpclient_static
)

if(${TRITON_ENABLE_GPU})
target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES})
endif() # TRITON_ENABLE_GPU
154 changes: 154 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,154 @@
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "openai_client_backend.h"

namespace triton { namespace perfanalyzer { namespace clientbackend {
namespace openai {

//==============================================================================

Error
OpenAiClientBackend::Create(
const std::string& url, const ProtocolType protocol,
std::shared_ptr<Headers> http_headers, const bool verbose,
std::unique_ptr<ClientBackend>* client_backend)
{
if (protocol == ProtocolType::GRPC) {
return Error(
"perf_analyzer does not support gRPC protocol with OpenAI endpoints");
}
std::unique_ptr<OpenAiClientBackend> openai_client_backend(
new OpenAiClientBackend(http_headers));

// TODO: Adjust as needed
RETURN_IF_CB_ERROR(HttpClient::Create(
&(openai_client_backend->http_client_), url, verbose));

*client_backend = std::move(openai_client_backend);

return Error::Success;
}

Error
OpenAiClientBackend::AsyncInfer(
OnCompleteFn callback, const InferOptions& options,
const std::vector<InferInput*>& inputs,
const std::vector<const InferRequestedOutput*>& outputs)
{
auto wrapped_callback = [callback](cb::openai::InferResult* client_result) {
cb::InferResult* result = new OpenAiInferResult(client_result);
callback(result);
};

// TODO: make an async infer call
//RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...));

return Error::Success;
}


Error
OpenAiClientBackend::ClientInferStat(InferStat* infer_stat)
{
// Reusing the common library utilities to collect and report the
// client side statistics.
tc::InferStat client_infer_stat;

RETURN_IF_TRITON_ERROR(http_client_->ClientInferStat(&client_infer_stat));

ParseInferStat(client_infer_stat, infer_stat);

return Error::Success;
}

void
OpenAiClientBackend::ParseInferStat(
const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat)
{
// TODO: Implement
return;
}

//==============================================================================

Error
OpenAiInferRequestedOutput::Create(
InferRequestedOutput** infer_output, const std::string& name)
{
OpenAiInferRequestedOutput* local_infer_output =
new OpenAiInferRequestedOutput(name);

tc::InferRequestedOutput* openai_infer_output;
RETURN_IF_TRITON_ERROR(
tc::InferRequestedOutput::Create(&openai_infer_output, name));
local_infer_output->output_.reset(openai_infer_output);

*infer_output = local_infer_output;

return Error::Success;
}

OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
const std::string& name)
: InferRequestedOutput(BackendKind::OPENAI, name)
{
}

//==============================================================================

OpenAiInferResult::OpenAiInferResult(cb::openai::InferResult* result)
{
result_.reset(result);
}

Error
OpenAiInferResult::Id(std::string* id) const
{
id->clear();
return Error::Success;
}

Error
OpenAiInferResult::RequestStatus() const
{
RETURN_IF_CB_ERROR(result_->RequestStatus());
return Error::Success;
}

Error
OpenAiInferResult::RawData(
const std::string& output_name, const uint8_t** buf,
size_t* byte_size) const
{
return Error(
"Output retrieval is not currently supported for OpenAi client backend");
}

//==============================================================================


}}}} // namespace triton::perfanalyzer::clientbackend::openai
Loading

0 comments on commit 84b0aff

Please sign in to comment.