OpenAI client backend + cmake

triton-inference-server · Feb 28, 2024 · 84b0aff · 84b0aff
1 parent 3105ec2
commit 84b0aff
Show file tree

Hide file tree

Showing 8 changed files with 466 additions and 5 deletions.
diff --git a/CMakeLists.txt b/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -45,6 +45,7 @@ option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF)
 option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF)
 option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF)
 option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF)
+option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF)
 option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF)
 option(TRITON_ENABLE_TESTS "Include tests in build" OFF)
 option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF)
@@ -142,6 +143,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
   if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS})
     message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
   endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS
+  if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI})
+    message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON")
+  endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI
 
   ExternalProject_Add(cc-clients
     PREFIX cc-clients
@@ -167,6 +171,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER
       -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
       -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
       -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
+      -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
       -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
       -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
       -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}
@@ -209,6 +214,7 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC)
       -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API}
       -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS}
       -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS}
+      -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI}
       -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES}
       -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS}
       -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU}

diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -170,6 +170,13 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
   )
 endif()
 
+if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
+  target_compile_definitions(
+    client-backend-library
+    PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1
+  )
+endif()
+
 install(
   TARGETS perf_analyzer
   RUNTIME DESTINATION bin

diff --git a/src/c++/perf_analyzer/client_backend/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/CMakeLists.txt
@@ -1,4 +1,4 @@
-# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -43,6 +43,10 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
   add_subdirectory(torchserve)
 endif()
 
+if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
+  add_subdirectory(openai)
+endif()
+
 set(
   CLIENT_BACKEND_SRCS
   client_backend.cc
@@ -71,6 +75,12 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS)
   set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:ts-client-backend-library,INCLUDE_DIRECTORIES>)
 endif()
 
+if(TRITON_ENABLE_PERF_ANALYZER_OPENAI)
+  set(OPENAI_LIBRARY $<TARGET_OBJECTS:openai-client-backend-library>)
+  set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $<TARGET_PROPERTY:openai-client-backend-library,LINK_LIBRARIES>)
+  set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $<TARGET_PROPERTY:openai-client-backend-library,INCLUDE_DIRECTORIES>)
+endif()
+
 add_library(
   client-backend-library
   ${CLIENT_BACKEND_SRCS}
@@ -80,6 +90,7 @@ add_library(
   ${CAPI_LIBRARY}
   ${TFS_LIBRARY}
   ${TS_LIBRARY}
+  ${OPENAI_LIBRARY}
 )
 
 target_link_libraries(
@@ -89,6 +100,7 @@ target_link_libraries(
   ${CAPI_TARGET_LINK_LIBRARY}
   ${TFS_TARGET_LINK_LIBRARY}
   ${TS_TARGET_LINK_LIBRARY}
+  ${OPENAI_TARGET_LINK_LIBRARY}
 )
 
 target_include_directories(
@@ -97,4 +109,5 @@ target_include_directories(
   ${CAPI_TARGET_INCLUDE_DIRECTORY}
   ${TFS_TARGET_INCLUDE_DIRECTORY}
   ${TS_TARGET_INCLUDE_DIRECTORY}
+  ${OPENAI_TARGET_INCLUDE_DIRECTORY}
 )
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h
@@ -1,4 +1,4 @@
-// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 //
 // Redistribution and use in source and binary forms, with or without
 // modification, are permitted provided that the following conditions
@@ -135,7 +135,8 @@ enum BackendKind {
   TRITON = 0,
   TENSORFLOW_SERVING = 1,
   TORCHSERVE = 2,
-  TRITON_C_API = 3
+  TRITON_C_API = 3,
+  OPENAI = 4
 };
 enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 };
 enum GrpcCompressionAlgorithm {

diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
@@ -0,0 +1,56 @@
+# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+cmake_minimum_required (VERSION 3.18)
+
+set(
+    OPENAI_CLIENT_BACKEND_SRCS
+    openai_client_backend.cc
+)
+
+set(
+    OPENAI_CLIENT_BACKEND_HDRS
+    openai_client_backend.h
+    openai_http_client.h
+)
+
+add_library(
+    openai-client-backend-library  EXCLUDE_FROM_ALL OBJECT
+    ${OPENAI_CLIENT_BACKEND_SRCS}
+    ${OPENAI_CLIENT_BACKEND_HDRS}
+)
+
+target_link_libraries(
+  openai-client-backend-library
+  # TODO: Assuming we'll need curl libs
+  PUBLIC CURL::libcurl 
+  PUBLIC httpclient_static
+)
+
+if(${TRITON_ENABLE_GPU})
+    target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS})
+    target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES})
+endif() # TRITON_ENABLE_GPU
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
@@ -0,0 +1,154 @@
+// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "openai_client_backend.h"
+
+namespace triton { namespace perfanalyzer { namespace clientbackend {
+namespace openai {
+
+//==============================================================================
+
+Error
+OpenAiClientBackend::Create(
+    const std::string& url, const ProtocolType protocol,
+    std::shared_ptr<Headers> http_headers, const bool verbose,
+    std::unique_ptr<ClientBackend>* client_backend)
+{
+  if (protocol == ProtocolType::GRPC) {
+    return Error(
+        "perf_analyzer does not support gRPC protocol with OpenAI endpoints");
+  }
+  std::unique_ptr<OpenAiClientBackend> openai_client_backend(
+    new OpenAiClientBackend(http_headers));
+
+  // TODO: Adjust as needed
+  RETURN_IF_CB_ERROR(HttpClient::Create(
+      &(openai_client_backend->http_client_), url, verbose));
+
+  *client_backend = std::move(openai_client_backend);
+
+  return Error::Success;
+}
+
+Error
+OpenAiClientBackend::AsyncInfer(
+    OnCompleteFn callback, const InferOptions& options,
+    const std::vector<InferInput*>& inputs,
+    const std::vector<const InferRequestedOutput*>& outputs)
+{
+  auto wrapped_callback = [callback](cb::openai::InferResult* client_result) {
+    cb::InferResult* result = new OpenAiInferResult(client_result);
+    callback(result);
+  };
+
+  // TODO: make an async infer call
+  //RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...));
+
+  return Error::Success;
+}
+
+
+Error
+OpenAiClientBackend::ClientInferStat(InferStat* infer_stat)
+{
+  // Reusing the common library utilities to collect and report the
+  // client side statistics.
+  tc::InferStat client_infer_stat;
+
+  RETURN_IF_TRITON_ERROR(http_client_->ClientInferStat(&client_infer_stat));
+
+  ParseInferStat(client_infer_stat, infer_stat);
+
+  return Error::Success;
+}
+
+void
+OpenAiClientBackend::ParseInferStat(
+    const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat)
+{
+  // TODO: Implement
+  return;
+}
+
+//==============================================================================
+
+Error
+OpenAiInferRequestedOutput::Create(
+    InferRequestedOutput** infer_output, const std::string& name)
+{
+  OpenAiInferRequestedOutput* local_infer_output =
+      new OpenAiInferRequestedOutput(name);
+
+  tc::InferRequestedOutput* openai_infer_output;
+  RETURN_IF_TRITON_ERROR(
+      tc::InferRequestedOutput::Create(&openai_infer_output, name));
+  local_infer_output->output_.reset(openai_infer_output);
+
+  *infer_output = local_infer_output;
+
+  return Error::Success;
+}
+
+OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
+    const std::string& name)
+    : InferRequestedOutput(BackendKind::OPENAI, name)
+{
+}
+
+//==============================================================================
+
+OpenAiInferResult::OpenAiInferResult(cb::openai::InferResult* result)
+{
+  result_.reset(result);
+}
+
+Error
+OpenAiInferResult::Id(std::string* id) const
+{
+  id->clear();
+  return Error::Success;
+}
+
+Error
+OpenAiInferResult::RequestStatus() const
+{
+  RETURN_IF_CB_ERROR(result_->RequestStatus());
+  return Error::Success;
+}
+
+Error
+OpenAiInferResult::RawData(
+    const std::string& output_name, const uint8_t** buf,
+    size_t* byte_size) const
+{
+  return Error(
+      "Output retrieval is not currently supported for OpenAi client backend");
+}
+
+//==============================================================================
+
+
+}}}}  // namespace triton::perfanalyzer::clientbackend::openai