From fde8dd50a4890118e2a173a3d7830a3ead914a0c Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 28 Feb 2024 08:54:53 -0600 Subject: [PATCH 01/23] Add openai service-kind and add endpoint to CLI --- .../client_backend/client_backend.h | 3 ++- src/c++/perf_analyzer/command_line_parser.cc | 17 +++++++++++++++++ src/c++/perf_analyzer/command_line_parser.h | 1 + .../perf_analyzer/test_command_line_parser.cc | 1 + 4 files changed, 21 insertions(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h index 870ea3dd5..988957e98 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.h +++ b/src/c++/perf_analyzer/client_backend/client_backend.h @@ -135,7 +135,8 @@ enum BackendKind { TRITON = 0, TENSORFLOW_SERVING = 1, TORCHSERVE = 2, - TRITON_C_API = 3 + TRITON_C_API = 3, + OPENAI = 4 }; enum ProtocolType { HTTP = 0, GRPC = 1, UNKNOWN = 2 }; enum GrpcCompressionAlgorithm { diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 711f1714e..9bcc5d46f 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -875,6 +875,7 @@ CLParser::ParseCommandLine(int argc, char** argv) {"periodic-concurrency-range", required_argument, 0, 59}, {"request-period", required_argument, 0, 60}, {"request-parameter", required_argument, 0, 61}, + {"endpoint", required_argument, 0, 62}, {0, 0, 0, 0}}; // Parse commandline... @@ -1169,6 +1170,8 @@ CLParser::ParseCommandLine(int argc, char** argv) params_->kind = cb::TORCHSERVE; } else if (arg.compare("triton_c_api") == 0) { params_->kind = cb::TRITON_C_API; + } else if (arg.compare("openai") == 0) { + params_->kind = cb::OPENAI; } else { Usage( "Failed to parse --service-kind. Unsupported type provided: '" + @@ -1608,6 +1611,9 @@ CLParser::ParseCommandLine(int argc, char** argv) params_->request_parameters[name] = param; break; } + case 62: { + params_->endpoint = optarg; + } case 'v': params_->extra_verbose = params_->verbose; params_->verbose = true; @@ -1909,6 +1915,17 @@ CLParser::VerifyOptions() params_->protocol = cb::ProtocolType::UNKNOWN; } + if (params_->kind == cb::BackendKind::OPENAI) { + if (params_->user_data.empty()) { + Usage("Must supply --input-data for OpenAI service kind."); + } + if (params_->endpoint.empty()) { + Usage( + "Must supply --endpoint for OpenAI service kind. For example, " + "\"v1/chat/completions\""); + } + } + if (params_->should_collect_metrics && params_->kind != cb::BackendKind::TRITON) { Usage( diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 9ff4869ff..79d387811 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -100,6 +100,7 @@ struct PerfAnalyzerParameters { bool dynamic_concurrency_mode = false; bool url_specified = false; std::string url{"localhost:8000"}; + std::string endpoint{""}; std::string model_name; std::string model_version; uint64_t batch_size = 1; diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc index fd0d8af16..6428a0f2f 100644 --- a/src/c++/perf_analyzer/test_command_line_parser.cc +++ b/src/c++/perf_analyzer/test_command_line_parser.cc @@ -263,6 +263,7 @@ TEST_CASE("Testing PerfAnalyzerParameters") CHECK(params->sequence_length == 20); CHECK(params->percentile == -1); CHECK(params->user_data.size() == 0); + CHECK_STRING("endpoint", params->endpoint, ""); CHECK(params->input_shapes.size() == 0); CHECK(params->measurement_window_ms == 5000); CHECK(params->using_concurrency_range == false); From 269f4f99de9965c5876f8dd56d5eefaa38c5646f Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 28 Feb 2024 09:04:07 -0600 Subject: [PATCH 02/23] Add openai to model parser --- src/c++/perf_analyzer/model_parser.cc | 20 ++++++++++++++++++++ src/c++/perf_analyzer/model_parser.h | 4 ++++ src/c++/perf_analyzer/perf_analyzer.cc | 5 +++++ 3 files changed, 29 insertions(+) diff --git a/src/c++/perf_analyzer/model_parser.cc b/src/c++/perf_analyzer/model_parser.cc index ee7ab5303..7dcd59819 100644 --- a/src/c++/perf_analyzer/model_parser.cc +++ b/src/c++/perf_analyzer/model_parser.cc @@ -265,6 +265,26 @@ ModelParser::InitTFServe( return cb::Error::Success; } +cb::Error +ModelParser::InitOpenAI( + const std::string& model_name, const std::string& model_version, + const int32_t batch_size) +{ + // OpenAI does not return model metadata hence we can not obtain any + // parameters. + model_name_ = model_name; + model_version_ = model_version; + max_batch_size_ = batch_size; + + // OpenAI will take a single json input with a fully formed payload + auto it = inputs_->emplace("payload", ModelTensor()).first; + it->second.name_ = "payload"; + it->second.datatype_ = "JSON"; + it->second.shape_.push_back(1); + + return cb::Error::Success; +} + cb::Error ModelParser::InitTorchServe( const std::string& model_name, const std::string& model_version, diff --git a/src/c++/perf_analyzer/model_parser.h b/src/c++/perf_analyzer/model_parser.h index 4646433ab..c1e16bac7 100644 --- a/src/c++/perf_analyzer/model_parser.h +++ b/src/c++/perf_analyzer/model_parser.h @@ -111,6 +111,10 @@ class ModelParser { const std::unordered_map>& input_shapes, std::unique_ptr& backend); + cb::Error InitOpenAI( + const std::string& model_name, const std::string& model_version, + const int32_t batch_size); + cb::Error InitTorchServe( const std::string& model_name, const std::string& model_version, const int32_t batch_size); diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 46b665757..a1a5ab635 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -108,6 +108,11 @@ PerfAnalyzer::CreateAnalyzerObjects() model_metadata, model_config, params_->model_version, params_->bls_composing_models, params_->input_shapes, backend_), "failed to create model parser"); + } else if (params_->kind == cb::BackendKind::OPENAI) { + FAIL_IF_ERR( + parser_->InitOpenAI( + params_->model_name, params_->model_version, params_->batch_size), + "failed to create model parser"); } else if (params_->kind == cb::BackendKind::TENSORFLOW_SERVING) { rapidjson::Document model_metadata; FAIL_IF_ERR( From 2342215d642f27458cce89f88366f1dda022ac12 Mon Sep 17 00:00:00 2001 From: oandreeva-nv Date: Wed, 28 Feb 2024 01:34:49 -0800 Subject: [PATCH 03/23] OpenAI client backend + cmake --- CMakeLists.txt | 8 +- src/c++/perf_analyzer/CMakeLists.txt | 9 +- .../client_backend/CMakeLists.txt | 15 +- .../client_backend/client_backend.h | 2 +- .../client_backend/openai/CMakeLists.txt | 56 +++++++ .../openai/openai_client_backend.cc | 154 ++++++++++++++++++ .../openai/openai_client_backend.h | 130 +++++++++++++++ .../openai/openai_http_client.h | 94 +++++++++++ 8 files changed, 464 insertions(+), 4 deletions(-) create mode 100644 src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_http_client.h diff --git a/CMakeLists.txt b/CMakeLists.txt index b1fc6ccf0..97f93ddaf 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2021-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2021-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -45,6 +45,7 @@ option(TRITON_ENABLE_PERF_ANALYZER "Enable Performance Analyzer" OFF) option(TRITON_ENABLE_PERF_ANALYZER_C_API "Enable Performance Analyzer C API" OFF) option(TRITON_ENABLE_PERF_ANALYZER_TFS "Enable TensorFlow Serving support for Performance Analyzer" OFF) option(TRITON_ENABLE_PERF_ANALYZER_TS "Enable TorchServe support for Performance Analyzer" OFF) +option(TRITON_ENABLE_PERF_ANALYZER_OPENAI "Enable OpenAI support for Performance Analyzer" OFF) option(TRITON_ENABLE_EXAMPLES "Include examples in build" OFF) option(TRITON_ENABLE_TESTS "Include tests in build" OFF) option(TRITON_ENABLE_GPU "Enable GPU support in libraries" OFF) @@ -142,6 +143,9 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_TS}) message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_TS=ON requires TRITON_ENABLE_PERF_ANALYZER=ON") endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_TS + if(NOT ${TRITON_ENABLE_PERF_ANALYZER} AND ${TRITON_ENABLE_PERF_ANALYZER_OPENAI}) + message(FATAL_ERROR "TRITON_ENABLE_PERF_ANALYZER_OPENAI=ON requires TRITON_ENABLE_PERF_ANALYZER=ON") + endif() # NOT TRITON_ENABLE_PERF_ANALYZER AND TRITON_ENABLE_PERF_ANALYZER_OPENAI ExternalProject_Add(cc-clients PREFIX cc-clients @@ -167,6 +171,7 @@ if(TRITON_ENABLE_CC_HTTP OR TRITON_ENABLE_CC_GRPC OR TRITON_ENABLE_PERF_ANALYZER -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API} -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS} -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS} + -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI} -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES} -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS} -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU} @@ -209,6 +214,7 @@ if(TRITON_ENABLE_PYTHON_HTTP OR TRITON_ENABLE_PYTHON_GRPC) -DTRITON_ENABLE_PERF_ANALYZER_C_API:BOOL=${TRITON_ENABLE_PERF_ANALYZER_C_API} -DTRITON_ENABLE_PERF_ANALYZER_TFS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TFS} -DTRITON_ENABLE_PERF_ANALYZER_TS:BOOL=${TRITON_ENABLE_PERF_ANALYZER_TS} + -DTRITON_ENABLE_PERF_ANALYZER_OPENAI:BOOL=${TRITON_ENABLE_PERF_ANALYZER_OPENAI} -DTRITON_ENABLE_EXAMPLES:BOOL=${TRITON_ENABLE_EXAMPLES} -DTRITON_ENABLE_TESTS:BOOL=${TRITON_ENABLE_TESTS} -DTRITON_ENABLE_GPU:BOOL=${TRITON_ENABLE_GPU} diff --git a/src/c++/perf_analyzer/CMakeLists.txt b/src/c++/perf_analyzer/CMakeLists.txt index bebdba4d5..fe34ace4f 100644 --- a/src/c++/perf_analyzer/CMakeLists.txt +++ b/src/c++/perf_analyzer/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -170,6 +170,13 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS) ) endif() +if(TRITON_ENABLE_PERF_ANALYZER_OPENAI) + target_compile_definitions( + client-backend-library + PUBLIC TRITON_ENABLE_PERF_ANALYZER_OPENAI=1 + ) +endif() + install( TARGETS perf_analyzer RUNTIME DESTINATION bin diff --git a/src/c++/perf_analyzer/client_backend/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/CMakeLists.txt index 23da6f32e..2c780ee22 100644 --- a/src/c++/perf_analyzer/client_backend/CMakeLists.txt +++ b/src/c++/perf_analyzer/client_backend/CMakeLists.txt @@ -1,4 +1,4 @@ -# Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -43,6 +43,10 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS) add_subdirectory(torchserve) endif() +if(TRITON_ENABLE_PERF_ANALYZER_OPENAI) + add_subdirectory(openai) +endif() + set( CLIENT_BACKEND_SRCS client_backend.cc @@ -71,6 +75,12 @@ if(TRITON_ENABLE_PERF_ANALYZER_TS) set(TS_TARGET_INCLUDE_DIRECTORY PRIVATE $) endif() +if(TRITON_ENABLE_PERF_ANALYZER_OPENAI) + set(OPENAI_LIBRARY $) + set(OPENAI_TARGET_LINK_LIBRARY PUBLIC $) + set(OPENAI_TARGET_INCLUDE_DIRECTORY PRIVATE $) +endif() + add_library( client-backend-library ${CLIENT_BACKEND_SRCS} @@ -80,6 +90,7 @@ add_library( ${CAPI_LIBRARY} ${TFS_LIBRARY} ${TS_LIBRARY} + ${OPENAI_LIBRARY} ) target_link_libraries( @@ -89,6 +100,7 @@ target_link_libraries( ${CAPI_TARGET_LINK_LIBRARY} ${TFS_TARGET_LINK_LIBRARY} ${TS_TARGET_LINK_LIBRARY} + ${OPENAI_TARGET_LINK_LIBRARY} ) target_include_directories( @@ -97,4 +109,5 @@ target_include_directories( ${CAPI_TARGET_INCLUDE_DIRECTORY} ${TFS_TARGET_INCLUDE_DIRECTORY} ${TS_TARGET_INCLUDE_DIRECTORY} + ${OPENAI_TARGET_INCLUDE_DIRECTORY} ) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h index 988957e98..487c215ce 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.h +++ b/src/c++/perf_analyzer/client_backend/client_backend.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt new file mode 100644 index 000000000..d9b5db33f --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt @@ -0,0 +1,56 @@ +# Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +cmake_minimum_required (VERSION 3.18) + +set( + OPENAI_CLIENT_BACKEND_SRCS + openai_client_backend.cc +) + +set( + OPENAI_CLIENT_BACKEND_HDRS + openai_client_backend.h + openai_http_client.h +) + +add_library( + openai-client-backend-library EXCLUDE_FROM_ALL OBJECT + ${OPENAI_CLIENT_BACKEND_SRCS} + ${OPENAI_CLIENT_BACKEND_HDRS} +) + +target_link_libraries( + openai-client-backend-library + # TODO: Assuming we'll need curl libs + PUBLIC CURL::libcurl + PUBLIC httpclient_static +) + +if(${TRITON_ENABLE_GPU}) + target_include_directories(openai-client-backend-library PUBLIC ${CUDA_INCLUDE_DIRS}) + target_link_libraries(openai-client-backend-library PRIVATE ${CUDA_LIBRARIES}) +endif() # TRITON_ENABLE_GPU diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc new file mode 100644 index 000000000..d9cca25d9 --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -0,0 +1,154 @@ +// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "openai_client_backend.h" + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +//============================================================================== + +Error +OpenAiClientBackend::Create( + const std::string& url, const ProtocolType protocol, + std::shared_ptr http_headers, const bool verbose, + std::unique_ptr* client_backend) +{ + if (protocol == ProtocolType::GRPC) { + return Error( + "perf_analyzer does not support gRPC protocol with OpenAI endpoints"); + } + std::unique_ptr openai_client_backend( + new OpenAiClientBackend(http_headers)); + + // TODO: Adjust as needed + RETURN_IF_CB_ERROR(HttpClient::Create( + &(openai_client_backend->http_client_), url, verbose)); + + *client_backend = std::move(openai_client_backend); + + return Error::Success; +} + +Error +OpenAiClientBackend::AsyncInfer( + OnCompleteFn callback, const InferOptions& options, + const std::vector& inputs, + const std::vector& outputs) +{ + auto wrapped_callback = [callback](cb::openai::InferResult* client_result) { + cb::InferResult* result = new OpenAiInferResult(client_result); + callback(result); + }; + + // TODO: make an async infer call + //RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...)); + + return Error::Success; +} + + +Error +OpenAiClientBackend::ClientInferStat(InferStat* infer_stat) +{ + // Reusing the common library utilities to collect and report the + // client side statistics. + tc::InferStat client_infer_stat; + + RETURN_IF_TRITON_ERROR(http_client_->ClientInferStat(&client_infer_stat)); + + ParseInferStat(client_infer_stat, infer_stat); + + return Error::Success; +} + +void +OpenAiClientBackend::ParseInferStat( + const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat) +{ + // TODO: Implement + return; +} + +//============================================================================== + +Error +OpenAiInferRequestedOutput::Create( + InferRequestedOutput** infer_output, const std::string& name) +{ + OpenAiInferRequestedOutput* local_infer_output = + new OpenAiInferRequestedOutput(name); + + tc::InferRequestedOutput* openai_infer_output; + RETURN_IF_TRITON_ERROR( + tc::InferRequestedOutput::Create(&openai_infer_output, name)); + local_infer_output->output_.reset(openai_infer_output); + + *infer_output = local_infer_output; + + return Error::Success; +} + +OpenAiInferRequestedOutput::OpenAiInferRequestedOutput( + const std::string& name) + : InferRequestedOutput(BackendKind::OPENAI, name) +{ +} + +//============================================================================== + +OpenAiInferResult::OpenAiInferResult(cb::openai::InferResult* result) +{ + result_.reset(result); +} + +Error +OpenAiInferResult::Id(std::string* id) const +{ + id->clear(); + return Error::Success; +} + +Error +OpenAiInferResult::RequestStatus() const +{ + RETURN_IF_CB_ERROR(result_->RequestStatus()); + return Error::Success; +} + +Error +OpenAiInferResult::RawData( + const std::string& output_name, const uint8_t** buf, + size_t* byte_size) const +{ + return Error( + "Output retrieval is not currently supported for OpenAi client backend"); +} + +//============================================================================== + + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h new file mode 100644 index 000000000..c6c83222f --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h @@ -0,0 +1,130 @@ +// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include + +#include "../../perf_utils.h" +#include "../client_backend.h" +#include "openai_http_client.h" + +#define RETURN_IF_TRITON_ERROR(S) \ + do { \ + const tc::Error& status__ = (S); \ + if (!status__.IsOk()) { \ + return Error(status__.Message()); \ + } \ + } while (false) + +namespace tc = triton::client; +namespace cb = triton::perfanalyzer::clientbackend; + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + + +//============================================================================== +/// OpenAiClientBackend is used to generate load on the serving instance, +/// which supports OpenAI Chat Completions API +/// +class OpenAiClientBackend : public ClientBackend { + public: + /// Create an OpenAI client backend which can be used to interact with the + /// server. + /// \param url The inference server url and port. + /// \param protocol The protocol type used. + /// \param http_headers Map of HTTP headers. The map key/value indicates + /// the header name/value. + /// \param verbose Enables the verbose mode. + /// \param client_backend Returns a new OpenAiClientBackend + /// object. + /// \return Error object indicating success or failure. + static Error Create( + const std::string& url, const ProtocolType protocol, + std::shared_ptr http_headers, const bool verbose, + std::unique_ptr* client_backend); + + /// See ClientBackend::AsyncInfer() + Error AsyncInfer( + OnCompleteFn callback, const InferOptions& options, + const std::vector& inputs, + const std::vector& outputs) override; + + /// See ClientBackend::ClientInferStat() + Error ClientInferStat(InferStat* infer_stat) override; + + private: + OpenAiClientBackend(std::shared_ptr http_headers) + : ClientBackend(BackendKind::OPENAI), http_headers_(http_headers) + { + } + + void ParseInferStat( + const tc::InferStat& openai_infer_stat, InferStat* infer_stat); + + std::unique_ptr http_client_; + std::shared_ptr http_headers_; +}; + +//============================================================== +/// OpenAiInferRequestedOutput is a wrapper around +/// InferRequestedOutput object of triton common client library. +/// +class OpenAiInferRequestedOutput : public InferRequestedOutput { + public: + static Error Create( + InferRequestedOutput** infer_output, const std::string& name); + /// Returns the raw InferRequestedOutput object required by OpenAi client + /// library. + tc::InferRequestedOutput* Get() const { return output_.get(); } + + private: + explicit OpenAiInferRequestedOutput(const std::string& name); + + std::unique_ptr output_; +}; + +//============================================================== +/// OpenAiInferResult is a wrapper around InferResult object of +/// OpenAi InferResult object. +/// +class OpenAiInferResult : public cb::InferResult { + public: + explicit OpenAiInferResult(cb::openai::InferResult* result); + /// See InferResult::Id() + Error Id(std::string* id) const override; + /// See InferResult::RequestStatus() + Error RequestStatus() const override; + /// See InferResult::RawData() + Error RawData( + const std::string& output_name, const uint8_t** buf, + size_t* byte_size) const override; + + private: + std::unique_ptr result_; +}; + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h new file mode 100644 index 000000000..03e3f489f --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h @@ -0,0 +1,94 @@ +// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include "../client_backend.h" +#include "common.h" + + +namespace tc = triton::client; + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +class InferResult; +class HttpInferRequest; + +//============================================================================== +/// An HttpClient object is used to perform any kind of communication with the +/// OpenAi service using +/// +/// \code +/// std::unique_ptr client; +/// HttpClient::Create(&client, "localhost:8080"); +/// ... +/// ... +/// \endcode +/// +class HttpClient : public tc::InferenceServerClient { + public: + ~HttpClient(); + + /// TODO: Adjust as needed + /// Create a client that can be used to communicate with the server. + /// \param client Returns a new InferenceServerHttpClient object. + /// \param server_url The inference server name and port. + /// \param verbose If true generate verbose output when contacting + /// the inference server. + /// \return Error object indicating success or failure. + static Error Create( + std::unique_ptr* client, const std::string& server_url, + const bool verbose); + + private: + HttpClient(const std::string& url, bool verbose); + + // The server url + const std::string url_; +}; + +//====================================================================== + +class InferResult { + public: + static Error Create( + InferResult** infer_result, + std::shared_ptr infer_request); + Error RequestStatus() const; + Error Id(std::string* id) const; + + private: + InferResult(std::shared_ptr infer_request); + + // The status of the inference + Error status_; + // The pointer to the HttpInferRequest object + std::shared_ptr infer_request_; +}; + +//====================================================================== + +}}}} // namespace triton::perfanalyzer::clientbackend::openai From 36e6cebf6dc37d80f6b5ba139e4e2165311b4fab Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 28 Feb 2024 13:13:38 -0600 Subject: [PATCH 04/23] Create OpenAI backend --- .../client_backend/client_backend.cc | 20 +++++++ .../client_backend/openai/CMakeLists.txt | 2 +- .../openai/openai_http_client.cc | 60 +++++++++++++++++++ .../openai/openai_http_client.h | 8 +-- 4 files changed, 85 insertions(+), 5 deletions(-) create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index 95b3ae0b6..282c6e181 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -32,6 +32,10 @@ #include "triton_c_api/triton_c_api_backend.h" #endif // TRITON_ENABLE_PERF_ANALYZER_C_API +#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI +#include "openai/openai_client_backend.h" +#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI + #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS #include "tensorflow_serving/tfserve_client_backend.h" #endif // TRITON_ENABLE_PERF_ANALYZER_TFS @@ -172,6 +176,13 @@ ClientBackend::Create( metrics_url, input_tensor_format, output_tensor_format, &local_backend)); } +#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI + // TODO -- I think this needs endpoint to be passed in? + else if (kind == OPENAI) { + RETURN_IF_CB_ERROR(openai::OpenAiClientBackend::Create( + url, protocol, http_headers, verbose, &local_backend)); + } +#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS else if (kind == TENSORFLOW_SERVING) { RETURN_IF_CB_ERROR(tfserving::TFServeClientBackend::Create( @@ -421,6 +432,15 @@ InferInput::Create( RETURN_IF_CB_ERROR(tritonremote::TritonInferInput::Create( infer_input, name, dims, datatype)); } +#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI + else if (kind == OPENAI) { + RETURN_IF_CB_ERROR( + // FIXME TODO TKG + // openai::OpenAiInferInput::Create(infer_input, name, dims, datatype)); + tritonremote::TritonInferInput::Create( + infer_input, name, dims, datatype)); + } +#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS else if (kind == TENSORFLOW_SERVING) { RETURN_IF_CB_ERROR(tfserving::TFServeInferInput::Create( diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt index d9b5db33f..ec839a2b3 100644 --- a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt +++ b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt @@ -29,6 +29,7 @@ cmake_minimum_required (VERSION 3.18) set( OPENAI_CLIENT_BACKEND_SRCS openai_client_backend.cc + openai_http_client.cc ) set( @@ -45,7 +46,6 @@ add_library( target_link_libraries( openai-client-backend-library - # TODO: Assuming we'll need curl libs PUBLIC CURL::libcurl PUBLIC httpclient_static ) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc new file mode 100644 index 000000000..5263407c3 --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc @@ -0,0 +1,60 @@ +// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "openai_http_client.h" + + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + + +Error +HttpClient::Create( + std::unique_ptr* client, const std::string& server_url, + bool verbose) +{ + client->reset(new HttpClient(server_url, verbose)); + return Error::Success; +} + + +HttpClient::HttpClient(const std::string& url, bool verbose) + : InferenceServerClient(verbose), url_(url) +// ,easy_handle_(reinterpret_cast(curl_easy_init()) // TODO FIXME TKG +{ +} + +HttpClient::~HttpClient() +{ + exiting_ = true; + + // FIXME TODO TKG + // if (easy_handle_ != nullptr) { + // curl_easy_cleanup(reinterpret_cast(easy_handle_)); + //} +} + +}}}} // namespace triton::perfanalyzer::clientbackend::openai \ No newline at end of file diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h index 03e3f489f..67f7d9144 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h @@ -1,4 +1,4 @@ -// Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved. +// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions @@ -65,7 +65,7 @@ class HttpClient : public tc::InferenceServerClient { private: HttpClient(const std::string& url, bool verbose); - + // The server url const std::string url_; }; @@ -77,8 +77,8 @@ class InferResult { static Error Create( InferResult** infer_result, std::shared_ptr infer_request); - Error RequestStatus() const; - Error Id(std::string* id) const; + Error RequestStatus() const { return Error::Success; } // TODO FIXME TKG + Error Id(std::string* id) const { return Error::Success; } // TODO FIXME TKG private: InferResult(std::shared_ptr infer_request); From ccd0b6876c46b17ab7212333b5c0cfbbbf25696a Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 28 Feb 2024 17:43:11 -0600 Subject: [PATCH 05/23] New JSON datatype for PA. Show json data available at http_client level --- .../client_backend/client_backend.cc | 5 +- .../client_backend/openai/CMakeLists.txt | 2 + .../openai/openai_client_backend.cc | 14 +-- .../openai/openai_http_client.cc | 60 ++++++++++ .../openai/openai_http_client.h | 12 ++ .../openai/openai_infer_input.cc | 112 ++++++++++++++++++ .../openai/openai_infer_input.h | 76 ++++++++++++ src/c++/perf_analyzer/perf_utils.cc | 23 ++++ 8 files changed, 292 insertions(+), 12 deletions(-) create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index 282c6e181..869762942 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -435,10 +435,7 @@ InferInput::Create( #ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI else if (kind == OPENAI) { RETURN_IF_CB_ERROR( - // FIXME TODO TKG - // openai::OpenAiInferInput::Create(infer_input, name, dims, datatype)); - tritonremote::TritonInferInput::Create( - infer_input, name, dims, datatype)); + openai::OpenAiInferInput::Create(infer_input, name, dims, datatype)); } #endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt index ec839a2b3..3ef867e9f 100644 --- a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt +++ b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt @@ -30,12 +30,14 @@ set( OPENAI_CLIENT_BACKEND_SRCS openai_client_backend.cc openai_http_client.cc + openai_infer_input.cc ) set( OPENAI_CLIENT_BACKEND_HDRS openai_client_backend.h openai_http_client.h + openai_infer_input.h ) add_library( diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc index d9cca25d9..d017b8b23 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -42,11 +42,10 @@ OpenAiClientBackend::Create( "perf_analyzer does not support gRPC protocol with OpenAI endpoints"); } std::unique_ptr openai_client_backend( - new OpenAiClientBackend(http_headers)); + new OpenAiClientBackend(http_headers)); - // TODO: Adjust as needed - RETURN_IF_CB_ERROR(HttpClient::Create( - &(openai_client_backend->http_client_), url, verbose)); + RETURN_IF_CB_ERROR( + HttpClient::Create(&(openai_client_backend->http_client_), url, verbose)); *client_backend = std::move(openai_client_backend); @@ -64,8 +63,8 @@ OpenAiClientBackend::AsyncInfer( callback(result); }; - // TODO: make an async infer call - //RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...)); + RETURN_IF_CB_ERROR(http_client_->AsyncInfer( + wrapped_callback, options, inputs, outputs, *http_headers_)); return Error::Success; } @@ -112,8 +111,7 @@ OpenAiInferRequestedOutput::Create( return Error::Success; } -OpenAiInferRequestedOutput::OpenAiInferRequestedOutput( - const std::string& name) +OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(const std::string& name) : InferRequestedOutput(BackendKind::OPENAI, name) { } diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc index 5263407c3..151eca2a6 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc @@ -26,6 +26,8 @@ #include "openai_http_client.h" +#include + namespace triton { namespace perfanalyzer { namespace clientbackend { namespace openai { @@ -40,6 +42,64 @@ HttpClient::Create( return Error::Success; } +Error +HttpClient::AsyncInfer( + OpenAiOnCompleteFn callback, const InferOptions& options, + const std::vector& inputs, + const std::vector& outputs, + const Headers& headers) +{ + // TODO FIXME implement + + // TODO FIXME cleanup or remove this. It just proves the json data arrives + rapidjson::Document d{}; + + if (inputs.size() != 1) { + return Error("Only expecting one input"); + } + + auto raw_input = dynamic_cast(inputs[0]); + + raw_input->PrepareForRequest(); + bool end_of_input = false; + const uint8_t* buf; + size_t buf_size; + raw_input->GetNext(&buf, &buf_size, &end_of_input); + if (!end_of_input) { + return Error("Unexpected multiple json data inputs"); + } + if (buf == nullptr) { + return Error("Unexpected null json data"); + } + + std::string json_str(reinterpret_cast(buf), buf_size); + std::cout << "FIXME TODO: JSON data string is " << json_str << std::endl; + + + if (d.Parse(json_str.c_str()).HasParseError()) { + return Error("Unable to parse json string: " + json_str); + } + + // FIXME TKG -- where/how would the 'streaming' option get plugged in? + + // FIXME TKG -- GOOD GOD! Is it this hard to add a single value into a json + // object?? + // FIXME TKG -- what if the user supplied this in the input json file? + d.AddMember( + "model", + rapidjson::Value().SetString( + options.model_name_.c_str(), + static_cast(options.model_name_.length()), + d.GetAllocator()), + d.GetAllocator()); + + for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) { + std::cout << "FIXME TODO: valid JSON object has key " + << itr->name.GetString() << std::endl; + } + + return Error::Success; +} HttpClient::HttpClient(const std::string& url, bool verbose) : InferenceServerClient(verbose), url_(url) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h index 67f7d9144..bbdaddfe9 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h @@ -27,6 +27,7 @@ #include "../client_backend.h" #include "common.h" +#include "openai_infer_input.h" namespace tc = triton::client; @@ -37,6 +38,8 @@ namespace openai { class InferResult; class HttpInferRequest; +using OpenAiOnCompleteFn = std::function; + //============================================================================== /// An HttpClient object is used to perform any kind of communication with the /// OpenAi service using @@ -63,6 +66,15 @@ class HttpClient : public tc::InferenceServerClient { std::unique_ptr* client, const std::string& server_url, const bool verbose); + /// TODO FIXME: Update + /// Run asynchronous inference on server. + Error AsyncInfer( + OpenAiOnCompleteFn callback, const InferOptions& options, + const std::vector& inputs, + const std::vector& outputs = + std::vector(), + const Headers& headers = Headers()); + private: HttpClient(const std::string& url, bool verbose); diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc new file mode 100644 index 000000000..70d827e85 --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc @@ -0,0 +1,112 @@ +// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "openai_infer_input.h" + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +Error +OpenAiInferInput::Create( + InferInput** infer_input, const std::string& name, + const std::vector& dims, const std::string& datatype) +{ + OpenAiInferInput* local_infer_input = + new OpenAiInferInput(name, dims, datatype); + + *infer_input = local_infer_input; + return Error::Success; +} + +Error +OpenAiInferInput::SetShape(const std::vector& shape) +{ + shape_ = shape; + return Error::Success; +} + +Error +OpenAiInferInput::Reset() +{ + bufs_.clear(); + buf_byte_sizes_.clear(); + bufs_idx_ = 0; + byte_size_ = 0; + return Error::Success; +} + +Error +OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size) +{ + byte_size_ += input_byte_size; + + bufs_.push_back(input); + buf_byte_sizes_.push_back(input_byte_size); + + return Error::Success; +} + +Error +OpenAiInferInput::ByteSize(size_t* byte_size) const +{ + *byte_size = byte_size_; + return Error::Success; +} + +Error +OpenAiInferInput::PrepareForRequest() +{ + // Reset position so request sends entire input. + bufs_idx_ = 0; + buf_pos_ = 0; + return Error::Success; +} + +Error +OpenAiInferInput::GetNext( + const uint8_t** buf, size_t* input_bytes, bool* end_of_input) +{ + if (bufs_idx_ < bufs_.size()) { + *buf = bufs_[bufs_idx_]; + *input_bytes = buf_byte_sizes_[bufs_idx_]; + bufs_idx_++; + } else { + *buf = nullptr; + *input_bytes = 0; + } + *end_of_input = (bufs_idx_ >= bufs_.size()); + + return Error::Success; +} + +OpenAiInferInput::OpenAiInferInput( + const std::string& name, const std::vector& dims, + const std::string& datatype) + : InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims) +{ +} + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h new file mode 100644 index 000000000..a10b9312f --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h @@ -0,0 +1,76 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include + +#include "../../perf_utils.h" +#include "../client_backend.h" + + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +//============================================================== +/// OpenAiInferInput instance holds the information regarding +/// model input tensors and their corresponding generated data. +/// +class OpenAiInferInput : public InferInput { + public: + static Error Create( + InferInput** infer_input, const std::string& name, + const std::vector& dims, const std::string& datatype); + /// See InferInput::Shape() + const std::vector& Shape() const override { return shape_; } + /// See InferInput::SetShape() + Error SetShape(const std::vector& shape) override; + /// See InferInput::Reset() + Error Reset() override; + /// See InferInput::AppendRaw() + Error AppendRaw(const uint8_t* input, size_t input_byte_size) override; + /// Gets the size of data added into this input in bytes. + /// \param byte_size The size of data added in bytes. + /// \return Error object indicating success or failure. + Error ByteSize(size_t* byte_size) const; + /// Resets the heads to start providing data from the beginning. + Error PrepareForRequest(); + /// Get the next chunk of data if available. + Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input); + + private: + explicit OpenAiInferInput( + const std::string& name, const std::vector& dims, + const std::string& datatype); + + std::vector shape_; + size_t byte_size_{0}; + + size_t bufs_idx_, buf_pos_; + std::vector bufs_; + std::vector buf_byte_sizes_; +}; + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/perf_utils.cc b/src/c++/perf_analyzer/perf_utils.cc index d6c0a8a37..4c02f56ca 100644 --- a/src/c++/perf_analyzer/perf_utils.cc +++ b/src/c++/perf_analyzer/perf_utils.cc @@ -27,6 +27,8 @@ #include "perf_utils.h" #include +#include +#include #include #include @@ -200,6 +202,25 @@ SerializeExplicitTensor( std::copy( serialized.begin(), serialized.end(), std::back_inserter(*decoded_data)); + } else if (dt.compare("JSON") == 0) { + std::string serialized = ""; + + for (const auto& value : tensor.GetArray()) { + rapidjson::StringBuffer buffer; + rapidjson::Writer writer(buffer); + value.Accept(writer); + + std::string element = buffer.GetString(); + uint32_t len = element.size(); + // FIXME TODO - for BYTES we add the length. Is there any reason that + // would be needed here? + // serialized.append(reinterpret_cast(&len), + // sizeof(uint32_t)); + serialized.append(element); + } + std::copy( + serialized.begin(), serialized.end(), + std::back_inserter(*decoded_data)); } else { for (const auto& value : tensor.GetArray()) { if (dt.compare("BOOL") == 0) { @@ -298,6 +319,8 @@ SerializeExplicitTensor( double element(value.GetDouble()); const char* src = reinterpret_cast(&element); decoded_data->insert(decoded_data->end(), src, src + sizeof(double)); + } else { + return cb::Error("Unexpected type " + dt); } } } From 4b841b331ea64a3fe5163327ed71eaf76f1e5efd Mon Sep 17 00:00:00 2001 From: tgerdes Date: Fri, 1 Mar 2024 15:16:15 -0600 Subject: [PATCH 06/23] Add an output to OpenAI models --- .../perf_analyzer/client_backend/client_backend.cc | 6 ++++++ src/c++/perf_analyzer/model_parser.cc | 14 ++++++++++---- 2 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index 869762942..04a68fefb 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -522,6 +522,12 @@ InferRequestedOutput::Create( RETURN_IF_CB_ERROR(tritonremote::TritonInferRequestedOutput::Create( infer_output, name, class_count)); } +#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI + else if (kind == OPENAI) { + RETURN_IF_CB_ERROR( + openai::OpenAiInferRequestedOutput::Create(infer_output, name)); + } +#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS else if (kind == TENSORFLOW_SERVING) { RETURN_IF_CB_ERROR( diff --git a/src/c++/perf_analyzer/model_parser.cc b/src/c++/perf_analyzer/model_parser.cc index 7dcd59819..30e149c0c 100644 --- a/src/c++/perf_analyzer/model_parser.cc +++ b/src/c++/perf_analyzer/model_parser.cc @@ -277,10 +277,16 @@ ModelParser::InitOpenAI( max_batch_size_ = batch_size; // OpenAI will take a single json input with a fully formed payload - auto it = inputs_->emplace("payload", ModelTensor()).first; - it->second.name_ = "payload"; - it->second.datatype_ = "JSON"; - it->second.shape_.push_back(1); + auto in_it = inputs_->emplace("payload", ModelTensor()).first; + in_it->second.name_ = "payload"; + in_it->second.datatype_ = "JSON"; + in_it->second.shape_.push_back(1); + + // OpenAI will reply with a single json output + auto out_it = outputs_->emplace("response", ModelTensor()).first; + out_it->second.name_ = "response"; + out_it->second.datatype_ = "JSON"; + out_it->second.shape_.push_back(1); return cb::Error::Success; } From e9f4a221ce254cdd83a50a2634fa7624db96fcf9 Mon Sep 17 00:00:00 2001 From: GuanLuo <41310872+GuanLuo@users.noreply.github.com> Date: Mon, 4 Mar 2024 08:00:27 -0800 Subject: [PATCH 07/23] Add OpenAI client (#482) * Add OpenAI client * Address comment --- .../client_backend/openai/CMakeLists.txt | 8 +- .../client_backend/openai/http_client.cc | 267 ++++++++++++++++ .../client_backend/openai/http_client.h | 191 +++++++++++ .../client_backend/openai/openai_client.cc | 298 ++++++++++++++++++ .../client_backend/openai/openai_client.h | 181 +++++++++++ .../openai/openai_client_backend.cc | 65 +--- .../openai/openai_client_backend.h | 25 +- .../openai/openai_http_client.cc | 120 ------- .../openai/openai_http_client.h | 106 ------- .../openai/openai_infer_input.cc | 38 +-- .../openai/openai_infer_input.h | 13 +- 11 files changed, 971 insertions(+), 341 deletions(-) create mode 100644 src/c++/perf_analyzer/client_backend/openai/http_client.cc create mode 100644 src/c++/perf_analyzer/client_backend/openai/http_client.h create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_client.cc create mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_client.h delete mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc delete mode 100644 src/c++/perf_analyzer/client_backend/openai/openai_http_client.h diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt index 3ef867e9f..93963e378 100644 --- a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt +++ b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt @@ -28,15 +28,17 @@ cmake_minimum_required (VERSION 3.18) set( OPENAI_CLIENT_BACKEND_SRCS + http_client.cc openai_client_backend.cc - openai_http_client.cc + openai_client.cc openai_infer_input.cc ) set( OPENAI_CLIENT_BACKEND_HDRS + http_client.h openai_client_backend.h - openai_http_client.h + openai_client.h openai_infer_input.h ) @@ -48,7 +50,7 @@ add_library( target_link_libraries( openai-client-backend-library - PUBLIC CURL::libcurl + PUBLIC CURL::libcurl PUBLIC httpclient_static ) diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.cc b/src/c++/perf_analyzer/client_backend/openai/http_client.cc new file mode 100644 index 000000000..4c8632c52 --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.cc @@ -0,0 +1,267 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +#include "http_client.h" + +#include +#include + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +HttpRequest::HttpRequest( + std::function&& completion_callback, const bool verbose) + : completion_callback_(std::move(completion_callback)), verbose_(verbose) +{ +} + +HttpRequest::~HttpRequest() +{ + if (header_list_ != nullptr) { + curl_slist_free_all(header_list_); + header_list_ = nullptr; + } +} + +void +HttpRequest::AddInput(uint8_t* buf, size_t byte_size) +{ + data_buffers_.push_back(std::pair(buf, byte_size)); + total_input_byte_size_ += byte_size; +} + +void +HttpRequest::GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes) +{ + *input_bytes = 0; + + while (!data_buffers_.empty() && size > 0) { + const size_t csz = std::min(data_buffers_.front().second, size); + if (csz > 0) { + const uint8_t* input_ptr = data_buffers_.front().first; + std::copy(input_ptr, input_ptr + csz, buf); + size -= csz; + buf += csz; + *input_bytes += csz; + + data_buffers_.front().first += csz; + data_buffers_.front().second -= csz; + } + if (data_buffers_.front().second == 0) { + data_buffers_.pop_front(); + } + } +} + +HttpClient::HttpClient( + const std::string& server_url, bool verbose, + const HttpSslOptions& ssl_options) + : url_(server_url), verbose_(verbose), ssl_options_(ssl_options) +{ + auto* ver = curl_version_info(CURLVERSION_NOW); + if (ver->features & CURL_VERSION_THREADSAFE == 0) { + throw std::runtime_error( + "HTTP client has dependency on CURL library to have thread-safe " + "support (CURL_VERSION_THREADSAFE set)"); + } + if (curl_global_init(CURL_GLOBAL_ALL) != 0) { + throw std::runtime_error("CURL global initialization failed"); + } + + multi_handle_ = curl_multi_init(); + + worker_ = std::thread(&HttpClient::AsyncTransfer, this); +} + +HttpClient::~HttpClient() +{ + exiting_ = true; + + // thread not joinable if AsyncInfer() is not called + // (it is default constructed thread before the first AsyncInfer() call) + if (worker_.joinable()) { + cv_.notify_all(); + worker_.join(); + } + + for (auto& request : ongoing_async_requests_) { + CURL* easy_handle = reinterpret_cast(request.first); + curl_multi_remove_handle(multi_handle_, easy_handle); + curl_easy_cleanup(easy_handle); + } + curl_multi_cleanup(multi_handle_); + + curl_global_cleanup(); +} + +const std::string& +HttpClient::ParseSslCertType(HttpSslOptions::CERTTYPE cert_type) +{ + static std::string pem_str{"PEM"}; + static std::string der_str{"DER"}; + switch (cert_type) { + case HttpSslOptions::CERTTYPE::CERT_PEM: + return pem_str; + case HttpSslOptions::CERTTYPE::CERT_DER: + return der_str; + } + throw std::runtime_error( + "Unexpected SSL certificate type encountered. Only PEM and DER are " + "supported."); +} + +const std::string& +HttpClient::ParseSslKeyType(HttpSslOptions::KEYTYPE key_type) +{ + static std::string pem_str{"PEM"}; + static std::string der_str{"DER"}; + switch (key_type) { + case HttpSslOptions::KEYTYPE::KEY_PEM: + return pem_str; + case HttpSslOptions::KEYTYPE::KEY_DER: + return der_str; + } + throw std::runtime_error( + "unsupported SSL key type encountered. Only PEM and DER are " + "supported."); +} + +void +HttpClient::SetSSLCurlOptions(CURL* curl_handle) +{ + curl_easy_setopt( + curl_handle, CURLOPT_SSL_VERIFYPEER, ssl_options_.verify_peer); + curl_easy_setopt( + curl_handle, CURLOPT_SSL_VERIFYHOST, ssl_options_.verify_host); + if (!ssl_options_.ca_info.empty()) { + curl_easy_setopt(curl_handle, CURLOPT_CAINFO, ssl_options_.ca_info.c_str()); + } + const auto& curl_cert_type = ParseSslCertType(ssl_options_.cert_type); + curl_easy_setopt(curl_handle, CURLOPT_SSLCERTTYPE, curl_cert_type.c_str()); + if (!ssl_options_.cert.empty()) { + curl_easy_setopt(curl_handle, CURLOPT_SSLCERT, ssl_options_.cert.c_str()); + } + const auto& curl_key_type = ParseSslKeyType(ssl_options_.key_type); + curl_easy_setopt(curl_handle, CURLOPT_SSLKEYTYPE, curl_key_type.c_str()); + if (!ssl_options_.key.empty()) { + curl_easy_setopt(curl_handle, CURLOPT_SSLKEY, ssl_options_.key.c_str()); + } +} + +void +HttpClient::Send(CURL* handle, std::unique_ptr&& request) +{ + std::lock_guard lock(mutex_); + + auto insert_result = ongoing_async_requests_.emplace( + std::make_pair(reinterpret_cast(handle), std::move(request))); + if (!insert_result.second) { + curl_easy_cleanup(handle); + throw std::runtime_error( + "Failed to insert new asynchronous request context."); + } + curl_multi_add_handle(multi_handle_, handle); + cv_.notify_all(); +} + +void +HttpClient::AsyncTransfer() +{ + int place_holder = 0; + CURLMsg* msg = nullptr; + do { + std::vector> request_list; + + // sleep if no work is available + std::unique_lock lock(mutex_); + cv_.wait(lock, [this] { + if (this->exiting_) { + return true; + } + // wake up if an async request has been generated + return !this->ongoing_async_requests_.empty(); + }); + + CURLMcode mc = curl_multi_perform(multi_handle_, &place_holder); + int numfds; + if (mc == CURLM_OK) { + // Wait for activity. If there are no descriptors in the multi_handle_ + // then curl_multi_wait will return immediately + mc = curl_multi_wait(multi_handle_, NULL, 0, INT_MAX, &numfds); + if (mc == CURLM_OK) { + while ((msg = curl_multi_info_read(multi_handle_, &place_holder))) { + uintptr_t identifier = reinterpret_cast(msg->easy_handle); + auto itr = ongoing_async_requests_.find(identifier); + // This shouldn't happen + if (itr == ongoing_async_requests_.end()) { + std::cerr + << "Unexpected error: received completed request that is not " + "in the list of asynchronous requests" + << std::endl; + curl_multi_remove_handle(multi_handle_, msg->easy_handle); + curl_easy_cleanup(msg->easy_handle); + continue; + } + + long http_code = 400; + if (msg->data.result == CURLE_OK) { + curl_easy_getinfo( + msg->easy_handle, CURLINFO_RESPONSE_CODE, &http_code); + } else if (msg->data.result == CURLE_OPERATION_TIMEDOUT) { + http_code = 499; + } + + request_list.emplace_back(std::move(itr->second)); + ongoing_async_requests_.erase(itr); + curl_multi_remove_handle(multi_handle_, msg->easy_handle); + curl_easy_cleanup(msg->easy_handle); + + std::unique_ptr& async_request = request_list.back(); + async_request->http_code_ = http_code; + + if (msg->msg != CURLMSG_DONE) { + // Something wrong happened. + std::cerr << "Unexpected error: received CURLMsg=" << msg->msg + << std::endl; + } + } + } else { + std::cerr << "Unexpected error: curl_multi failed. Code:" << mc + << std::endl; + } + } else { + std::cerr << "Unexpected error: curl_multi failed. Code:" << mc + << std::endl; + } + lock.unlock(); + + for (auto& this_request : request_list) { + this_request->completion_callback_(this_request.get()); + } + } while (!exiting_); +} + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h new file mode 100644 index 000000000..c6acfd524 --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.h @@ -0,0 +1,191 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +#include + +#include +#include +#include +#include +#include +#include +#include + +// [TODO] Below should already be a generic class for any HTTP use, +// relocate it so that it can be used elsewhere +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +// [FIXME] add back "parameter" handling +// [FIXME] add back "compression" handling + +/// The key-value map type to be included in the request +/// as custom headers. +typedef std::map Headers; +/// The key-value map type to be included as URL parameters. +typedef std::map Parameters; + +// The options for authorizing and authenticating SSL/TLS connections. +struct HttpSslOptions { + enum CERTTYPE { CERT_PEM = 0, CERT_DER = 1 }; + enum KEYTYPE { + KEY_PEM = 0, + KEY_DER = 1 + // TODO: Support loading private key from crypto engine + // KEY_ENG = 2 + }; + explicit HttpSslOptions() + : verify_peer(1), verify_host(2), cert_type(CERTTYPE::CERT_PEM), + key_type(KEYTYPE::KEY_PEM) + { + } + // This option determines whether curl verifies the authenticity of the peer's + // certificate. A value of 1 means curl verifies; 0 (zero) means it does not. + // Default value is 1. See here for more details: + // https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYPEER.html + long verify_peer; + // This option determines whether libcurl verifies that the server cert is for + // the server it is known as. The default value for this option is 2 which + // means that certificate must indicate that the server is the server to which + // you meant to connect, or the connection fails. See here for more details: + // https://curl.se/libcurl/c/CURLOPT_SSL_VERIFYHOST.html + long verify_host; + // File holding one or more certificates to verify the peer with. If not + // specified, client will look for the system path where cacert bundle is + // assumed to be stored, as established at build time. See here for more + // information: https://curl.se/libcurl/c/CURLOPT_CAINFO.html + std::string ca_info; + // The format of client certificate. By default it is CERT_PEM. See here for + // more details: https://curl.se/libcurl/c/CURLOPT_SSLCERTTYPE.html + CERTTYPE cert_type; + // The file name of your client certificate. See here for more details: + // https://curl.se/libcurl/c/CURLOPT_SSLCERT.html + std::string cert; + // The format of the private key. By default it is KEY_PEM. See here for more + // details: https://curl.se/libcurl/c/CURLOPT_SSLKEYTYPE.html. + KEYTYPE key_type; + // The private key. See here for more details: + // https://curl.se/libcurl/c/CURLOPT_SSLKEY.html. + std::string key; +}; + +// HttpRequest object representing the context of a HTTP transaction. Currently +// it is also designed to be the placeholder for response data, but how the +// response is stored can be revisited later. +// 'completion_callback' doesn't transfer ownership of HttpRequest, caller must +// not keep the reference and access HttpRequest object after +// 'completion_callback' returns +class HttpRequest { + public: + HttpRequest( + std::function&& completion_callback, + const bool verbose = false); + virtual ~HttpRequest(); + + // Adds the input data to be delivered to the server, note that the HTTP + // request does not own the buffer. + void AddInput(uint8_t* buf, size_t byte_size); + + // Helper function for CURL + // Copy into 'buf' up to 'size' bytes of input data. Return the + // actual amount copied in 'input_bytes'. + void GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes); + + // [FIXME] define default callback like + // CURLOPT_READFUNCTION, CURLOPT_WRITEFUNCTION here? + // the specialized HttpRequest can override the callbacks when read / write + // schema has changed. + + // Buffer that accumulates the response body. + std::string response_buffer_; + + size_t total_input_byte_size_{0}; + + // HTTP response code for the inference request + uint32_t http_code_{200}; + + std::function completion_callback_{nullptr}; + + // Pointer to the list of the HTTP request header, keep it such that it will + // be valid during the transfer and can be freed once transfer is completed. + struct curl_slist* header_list_{nullptr}; + + protected: + const bool verbose_{false}; + + // Pointers to the input data. + std::deque> data_buffers_; +}; + +// Base class for common HTTP functionalities +class HttpClient { + public: + enum class CompressionType { NONE, DEFLATE, GZIP }; + + virtual ~HttpClient(); + + protected: + void SetSSLCurlOptions(CURL* curl_handle); + + HttpClient( + const std::string& server_url, bool verbose = false, + const HttpSslOptions& ssl_options = HttpSslOptions()); + + // Note that this function does not block + void Send(CURL* handle, std::unique_ptr&& request); + + // [FIXME] provide more helper functions to encapsulate CURL detail + + protected: + void AsyncTransfer(); + + bool exiting_{false}; + + std::thread worker_; + std::mutex mutex_; + std::condition_variable cv_; + + // The server url + const std::string url_; + // The options for authorizing and authenticating SSL/TLS connections + HttpSslOptions ssl_options_; + + using AsyncReqMap = std::map>; + // curl multi handle for processing asynchronous requests + void* multi_handle_; + // map to record ongoing asynchronous requests with pointer to easy handle + // or tag id as key + AsyncReqMap ongoing_async_requests_; + + bool verbose_; + + private: + // [FIXME] should belong to SSL option struct as helper function + const std::string& ParseSslKeyType(HttpSslOptions::KEYTYPE key_type); + const std::string& ParseSslCertType(HttpSslOptions::CERTTYPE cert_type); +}; +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc new file mode 100644 index 000000000..f83c3976b --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -0,0 +1,298 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +// Include this first to make sure we are a friend of common classes. +#define TRITON_INFERENCE_SERVER_CLIENT_CLASS InferenceServerHttpClient +#include "openai_client.h" + +#include + +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "common.h" + +#ifdef TRITON_ENABLE_ZLIB +#include +#endif + +extern "C" { +#include "cencode.h" +} + +#ifdef _WIN32 +#define strncasecmp(x, y, z) _strnicmp(x, y, z) +#undef min // NOMINMAX did not resolve std::min compile error +#endif //_WIN32 + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +//============================================================================== + +void +ChatCompletionRequest::SendResponse(bool is_final, bool is_null) +{ + response_callback_(new ChatCompletionResult( + http_code_, std::move(response_buffer_), is_final, is_null, request_id_)); +} + +ChatCompletionClient::ChatCompletionClient( + const std::string& url, bool verbose, const HttpSslOptions& ssl_options) + : HttpClient( + std::string(url + "/v1/chat/completions"), verbose, ssl_options) +{ +} + +size_t +ChatCompletionClient::RequestProvider( + void* contents, size_t size, size_t nmemb, void* userp) +{ + auto request = reinterpret_cast(userp); + + size_t input_bytes = 0; + request->GetNextInput( + reinterpret_cast(contents), size * nmemb, &input_bytes); + + if (input_bytes == 0) { + request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::SEND_END); + } + + return input_bytes; +} + +size_t +ChatCompletionClient::ResponseHeaderHandler( + void* contents, size_t size, size_t nmemb, void* userp) +{ + auto request = reinterpret_cast(userp); + + char* buf = reinterpret_cast(contents); + size_t byte_size = size * nmemb; + + std::string hdr(buf, byte_size); + std::transform(hdr.begin(), hdr.end(), hdr.begin(), [](unsigned char c) { + return std::tolower(c); + }); + if (hdr.find("content-type") != std::string::npos) { + request->is_stream_ = (hdr.find("text/event-stream") != std::string::npos); + } + + return byte_size; +} + +size_t +ChatCompletionClient::ResponseHandler( + void* contents, size_t size, size_t nmemb, void* userp) +{ + // [WIP] verify if the SSE responses received are complete, or the response + // need to be stitched first + auto request = reinterpret_cast(userp); + if (request->timer_.Timestamp( + triton::client::RequestTimers::Kind::RECV_START) == 0) { + request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::RECV_START); + } + + char* buf = reinterpret_cast(contents); + size_t result_bytes = size * nmemb; + request->response_buffer_.append(buf, result_bytes); + // Send response now if streaming, otherwise wait until request has been + // completed + if (request->is_stream_) { + // [FIXME] assume it is proper chunked of response + auto done_signal = + (request->response_buffer_.find("data: [DONE]") != std::string::npos); + request->SendResponse( + done_signal /* is_final */, done_signal /* is_null */); + } + + // ResponseHandler may be called multiple times so we overwrite + // RECV_END so that we always have the time of the last. + request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::RECV_END); + + return result_bytes; +} + + +Error +ChatCompletionClient::AsyncInfer( + std::function callback, + std::string& serialized_request_body, const std::string& request_id) +{ + if (callback == nullptr) { + return Error( + "Callback function must be provided along with AsyncInfer() call."); + } + + auto completion_callback = [this](HttpRequest* req) { + auto request = static_cast(req); + if (!request->is_stream_) { + request->SendResponse(true /* is_final */, false /* is_null */); + } + request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::REQUEST_END); + UpdateInferStat(request->timer_); + }; + std::unique_ptr request(new ChatCompletionRequest( + std::move(completion_callback), std::move(callback), request_id, + verbose_)); + auto raw_request = static_cast(request.get()); + raw_request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::REQUEST_START); + request->AddInput( + reinterpret_cast(serialized_request_body.data()), + serialized_request_body.size()); + + CURL* multi_easy_handle = curl_easy_init(); + Error err = PreRunProcessing(multi_easy_handle, raw_request); + if (!err.IsOk()) { + curl_easy_cleanup(multi_easy_handle); + return err; + } + + raw_request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::SEND_START); + Send(multi_easy_handle, std::move(request)); + return Error::Success; +} + +Error +ChatCompletionClient::PreRunProcessing( + CURL* curl, ChatCompletionRequest* request) +{ + curl_easy_setopt(curl, CURLOPT_URL, url_.c_str()); + curl_easy_setopt(curl, CURLOPT_USERAGENT, "libcurl-agent/1.0"); + curl_easy_setopt(curl, CURLOPT_POST, 1L); + curl_easy_setopt(curl, CURLOPT_TCP_NODELAY, 1L); + + if (verbose_) { + curl_easy_setopt(curl, CURLOPT_VERBOSE, 1L); + } + + const long buffer_byte_size = 16 * 1024 * 1024; + curl_easy_setopt(curl, CURLOPT_UPLOAD_BUFFERSIZE, buffer_byte_size); + curl_easy_setopt(curl, CURLOPT_BUFFERSIZE, buffer_byte_size); + + // request data provided by RequestProvider() + curl_easy_setopt(curl, CURLOPT_READFUNCTION, RequestProvider); + curl_easy_setopt(curl, CURLOPT_READDATA, request); + + // response headers handled by ResponseHeaderHandler() + curl_easy_setopt(curl, CURLOPT_HEADERFUNCTION, ResponseHeaderHandler); + curl_easy_setopt(curl, CURLOPT_HEADERDATA, request); + + // response data handled by ResponseHandler() + curl_easy_setopt(curl, CURLOPT_WRITEFUNCTION, ResponseHandler); + curl_easy_setopt(curl, CURLOPT_WRITEDATA, request); + + const curl_off_t post_byte_size = request->total_input_byte_size_; + curl_easy_setopt(curl, CURLOPT_POSTFIELDSIZE_LARGE, post_byte_size); + + SetSSLCurlOptions(curl); + + struct curl_slist* list = nullptr; + list = curl_slist_append(list, "Expect:"); + list = curl_slist_append(list, "Content-Type: application/json"); + curl_easy_setopt(curl, CURLOPT_HTTPHEADER, list); + + // The list will be freed when the request is destructed + request->header_list_ = list; + + return Error::Success; +} + +Error +ChatCompletionClient::UpdateInferStat( + const triton::client::RequestTimers& timer) +{ + const uint64_t request_time_ns = timer.Duration( + triton::client::RequestTimers::Kind::REQUEST_START, + triton::client::RequestTimers::Kind::REQUEST_END); + const uint64_t send_time_ns = timer.Duration( + triton::client::RequestTimers::Kind::SEND_START, + triton::client::RequestTimers::Kind::SEND_END); + const uint64_t recv_time_ns = timer.Duration( + triton::client::RequestTimers::Kind::RECV_START, + triton::client::RequestTimers::Kind::RECV_END); + + if ((request_time_ns == std::numeric_limits::max()) || + (send_time_ns == std::numeric_limits::max()) || + (recv_time_ns == std::numeric_limits::max())) { + return Error( + "Timer not set correctly." + + ((timer.Timestamp(triton::client::RequestTimers::Kind::REQUEST_START) > + timer.Timestamp(triton::client::RequestTimers::Kind::REQUEST_END)) + ? (" Request time from " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::REQUEST_START)) + + " to " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::REQUEST_END)) + + ".") + : "") + + ((timer.Timestamp(triton::client::RequestTimers::Kind::SEND_START) > + timer.Timestamp(triton::client::RequestTimers::Kind::SEND_END)) + ? (" Send time from " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::SEND_START)) + + " to " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::SEND_END)) + + ".") + : "") + + ((timer.Timestamp(triton::client::RequestTimers::Kind::RECV_START) > + timer.Timestamp(triton::client::RequestTimers::Kind::RECV_END)) + ? (" Receive time from " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::RECV_START)) + + " to " + + std::to_string(timer.Timestamp( + triton::client::RequestTimers::Kind::RECV_END)) + + ".") + : "")); + } + + infer_stat_.completed_request_count++; + infer_stat_.cumulative_total_request_time_ns += request_time_ns; + infer_stat_.cumulative_send_time_ns += send_time_ns; + infer_stat_.cumulative_receive_time_ns += recv_time_ns; + + return Error::Success; +} + +//============================================================================== + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h new file mode 100644 index 000000000..bff2d299f --- /dev/null +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -0,0 +1,181 @@ +// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// +// Redistribution and use in source and binary forms, with or without +// modification, are permitted provided that the following conditions +// are met: +// * Redistributions of source code must retain the above copyright +// notice, this list of conditions and the following disclaimer. +// * Redistributions in binary form must reproduce the above copyright +// notice, this list of conditions and the following disclaimer in the +// documentation and/or other materials provided with the distribution. +// * Neither the name of NVIDIA CORPORATION nor the names of its +// contributors may be used to endorse or promote products derived +// from this software without specific prior written permission. +// +// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +#pragma once + +/// \file + +#include +#include + +#include "../client_backend.h" +#include "common.h" +#include "http_client.h" + + +namespace triton { namespace perfanalyzer { namespace clientbackend { +namespace openai { + +class ChatCompletionResult : public InferResult { + public: + ChatCompletionResult( + uint32_t http_code, std::string&& serialized_response, bool is_final, + bool is_null, const std::string& request_id) + : http_code_(http_code), + serialized_response_(std::move(serialized_response)), + is_final_(is_final), is_null_(is_null), request_id_(request_id) + { + } + virtual ~ChatCompletionResult() = default; + + /// Get the id of the request which generated this response. + /// \param id Returns the request id that generated the result. + /// \return Error object indicating success or failure. + Error Id(std::string* id) const override + { + *id = request_id_; + return Error::Success; + } + + + /// Returns the status of the request. + /// \return Error object indicating the success or failure of the + /// request. + Error RequestStatus() const override + { + if ((http_code_ >= 400) && (http_code_ <= 599)) { + return Error( + "OpenAI response returns HTTP code" + std::to_string(http_code_)); + } + return Error::Success; + } + + /// Returns the raw data of the output. + /// \return Error object indicating the success or failure of the + /// request. + Error RawData( + const std::string& output_name, const uint8_t** buf, + size_t* byte_size) const override + { + // [FIXME] disregard "output_name" which is not compatible to + // OpenAI protocol + *buf = reinterpret_cast(serialized_response_.c_str()); + *byte_size = serialized_response_.size(); + return Error::Success; + } + + /// Get final response bool for this response. + /// \return Error object indicating the success or failure. + Error IsFinalResponse(bool* is_final_response) const override + { + *is_final_response = is_final_; + return Error::Success; + }; + + /// Get null response bool for this response. + /// \return Error object indicating the success or failure. + Error IsNullResponse(bool* is_null_response) const override + { + *is_null_response = is_null_; + return Error::Success; + }; + + private: + const uint32_t http_code_{200}; + const std::string serialized_response_; + const bool is_final_{false}; + const bool is_null_{false}; + const std::string request_id_; +}; + + +class ChatCompletionRequest : public HttpRequest { + public: + virtual ~ChatCompletionRequest() {} + ChatCompletionRequest( + std::function&& completion_callback, + std::function&& response_callback, + const std::string& request_id, const bool verbose = false) + : HttpRequest(std::move(completion_callback), verbose), + response_callback_(std::move(response_callback)), + request_id_(request_id) + { + } + void SendResponse(bool is_final, bool is_null); + bool is_stream_{false}; + std::function response_callback_{nullptr}; + // The timers for infer request. + triton::client::RequestTimers timer_; + const std::string request_id_; +}; + +class ChatCompletionClient : public HttpClient { + public: + virtual ~ChatCompletionClient() = default; + + /// Create a client that can be used to communicate with the server. + /// \param server_url The inference server name, port, optional + /// scheme and optional base path in the following format: + /// host:port/. + /// \param verbose If true generate verbose output when contacting + /// the inference server. + /// \param ssl_options Specifies the settings for configuring + /// SSL encryption and authorization. Providing these options + /// do not ensure that SSL/TLS will be used in communication. + /// The use of SSL/TLS depends entirely on the server endpoint. + /// These options will be ignored if the server_url does not + /// expose `https://` scheme. + ChatCompletionClient( + const std::string& server_url, bool verbose = false, + const HttpSslOptions& ssl_options = HttpSslOptions()); + + /// Simplified AsyncInfer() where the request body is expected to be + /// prepared by the caller, the client here is responsible to communicate + /// with a OpenAI-compatible server in both streaming and non-streaming case. + Error AsyncInfer( + std::function callback, + std::string& serialized_request_body, const std::string& request_id); + + const InferStat& ClientInferStat() { return infer_stat_; } + + /// [TODO?] Add AsyncInfer() variant that prepare the request body from + /// function arguments. Similar to Triton client library. + + private: + // setup curl handle + Error PreRunProcessing(CURL* curl, ChatCompletionRequest* request); + + static size_t ResponseHandler( + void* contents, size_t size, size_t nmemb, void* userp); + static size_t RequestProvider( + void* contents, size_t size, size_t nmemb, void* userp); + static size_t ResponseHeaderHandler( + void* contents, size_t size, size_t nmemb, void* userp); + + Error UpdateInferStat(const triton::client::RequestTimers& timer); + InferStat infer_stat_; +}; + +}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc index d017b8b23..9f62beb29 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -26,6 +26,8 @@ #include "openai_client_backend.h" +#include "openai_infer_input.h" + namespace triton { namespace perfanalyzer { namespace clientbackend { namespace openai { @@ -44,8 +46,8 @@ OpenAiClientBackend::Create( std::unique_ptr openai_client_backend( new OpenAiClientBackend(http_headers)); - RETURN_IF_CB_ERROR( - HttpClient::Create(&(openai_client_backend->http_client_), url, verbose)); + openai_client_backend->http_client_.reset( + new ChatCompletionClient(url, verbose)); *client_backend = std::move(openai_client_backend); @@ -58,14 +60,14 @@ OpenAiClientBackend::AsyncInfer( const std::vector& inputs, const std::vector& outputs) { - auto wrapped_callback = [callback](cb::openai::InferResult* client_result) { - cb::InferResult* result = new OpenAiInferResult(client_result); - callback(result); - }; + if (inputs.size() != 1) { + return Error("Only expecting one input"); + } + auto raw_input = dynamic_cast(inputs[0]); + raw_input->PrepareForRequest(); RETURN_IF_CB_ERROR(http_client_->AsyncInfer( - wrapped_callback, options, inputs, outputs, *http_headers_)); - + callback, raw_input->DataString(), options.request_id_)); return Error::Success; } @@ -73,25 +75,10 @@ OpenAiClientBackend::AsyncInfer( Error OpenAiClientBackend::ClientInferStat(InferStat* infer_stat) { - // Reusing the common library utilities to collect and report the - // client side statistics. - tc::InferStat client_infer_stat; - - RETURN_IF_TRITON_ERROR(http_client_->ClientInferStat(&client_infer_stat)); - - ParseInferStat(client_infer_stat, infer_stat); - + *infer_stat = http_client_->ClientInferStat(); return Error::Success; } -void -OpenAiClientBackend::ParseInferStat( - const tc::InferStat& tfserve_infer_stat, InferStat* infer_stat) -{ - // TODO: Implement - return; -} - //============================================================================== Error @@ -118,35 +105,5 @@ OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(const std::string& name) //============================================================================== -OpenAiInferResult::OpenAiInferResult(cb::openai::InferResult* result) -{ - result_.reset(result); -} - -Error -OpenAiInferResult::Id(std::string* id) const -{ - id->clear(); - return Error::Success; -} - -Error -OpenAiInferResult::RequestStatus() const -{ - RETURN_IF_CB_ERROR(result_->RequestStatus()); - return Error::Success; -} - -Error -OpenAiInferResult::RawData( - const std::string& output_name, const uint8_t** buf, - size_t* byte_size) const -{ - return Error( - "Output retrieval is not currently supported for OpenAi client backend"); -} - -//============================================================================== - }}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h index c6c83222f..ea9a49a82 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h @@ -29,7 +29,8 @@ #include "../../perf_utils.h" #include "../client_backend.h" -#include "openai_http_client.h" +#include "openai_client.h" +#include "openai_infer_input.h" #define RETURN_IF_TRITON_ERROR(S) \ do { \ @@ -85,7 +86,7 @@ class OpenAiClientBackend : public ClientBackend { void ParseInferStat( const tc::InferStat& openai_infer_stat, InferStat* infer_stat); - std::unique_ptr http_client_; + std::unique_ptr http_client_; std::shared_ptr http_headers_; }; @@ -107,24 +108,4 @@ class OpenAiInferRequestedOutput : public InferRequestedOutput { std::unique_ptr output_; }; -//============================================================== -/// OpenAiInferResult is a wrapper around InferResult object of -/// OpenAi InferResult object. -/// -class OpenAiInferResult : public cb::InferResult { - public: - explicit OpenAiInferResult(cb::openai::InferResult* result); - /// See InferResult::Id() - Error Id(std::string* id) const override; - /// See InferResult::RequestStatus() - Error RequestStatus() const override; - /// See InferResult::RawData() - Error RawData( - const std::string& output_name, const uint8_t** buf, - size_t* byte_size) const override; - - private: - std::unique_ptr result_; -}; - }}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc deleted file mode 100644 index 151eca2a6..000000000 --- a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc +++ /dev/null @@ -1,120 +0,0 @@ -// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of NVIDIA CORPORATION nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -#include "openai_http_client.h" - -#include - - -namespace triton { namespace perfanalyzer { namespace clientbackend { -namespace openai { - - -Error -HttpClient::Create( - std::unique_ptr* client, const std::string& server_url, - bool verbose) -{ - client->reset(new HttpClient(server_url, verbose)); - return Error::Success; -} - -Error -HttpClient::AsyncInfer( - OpenAiOnCompleteFn callback, const InferOptions& options, - const std::vector& inputs, - const std::vector& outputs, - const Headers& headers) -{ - // TODO FIXME implement - - // TODO FIXME cleanup or remove this. It just proves the json data arrives - rapidjson::Document d{}; - - if (inputs.size() != 1) { - return Error("Only expecting one input"); - } - - auto raw_input = dynamic_cast(inputs[0]); - - raw_input->PrepareForRequest(); - bool end_of_input = false; - const uint8_t* buf; - size_t buf_size; - raw_input->GetNext(&buf, &buf_size, &end_of_input); - if (!end_of_input) { - return Error("Unexpected multiple json data inputs"); - } - if (buf == nullptr) { - return Error("Unexpected null json data"); - } - - std::string json_str(reinterpret_cast(buf), buf_size); - std::cout << "FIXME TODO: JSON data string is " << json_str << std::endl; - - - if (d.Parse(json_str.c_str()).HasParseError()) { - return Error("Unable to parse json string: " + json_str); - } - - // FIXME TKG -- where/how would the 'streaming' option get plugged in? - - // FIXME TKG -- GOOD GOD! Is it this hard to add a single value into a json - // object?? - // FIXME TKG -- what if the user supplied this in the input json file? - d.AddMember( - "model", - rapidjson::Value().SetString( - options.model_name_.c_str(), - static_cast(options.model_name_.length()), - d.GetAllocator()), - d.GetAllocator()); - - for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) { - std::cout << "FIXME TODO: valid JSON object has key " - << itr->name.GetString() << std::endl; - } - - return Error::Success; -} - -HttpClient::HttpClient(const std::string& url, bool verbose) - : InferenceServerClient(verbose), url_(url) -// ,easy_handle_(reinterpret_cast(curl_easy_init()) // TODO FIXME TKG -{ -} - -HttpClient::~HttpClient() -{ - exiting_ = true; - - // FIXME TODO TKG - // if (easy_handle_ != nullptr) { - // curl_easy_cleanup(reinterpret_cast(easy_handle_)); - //} -} - -}}}} // namespace triton::perfanalyzer::clientbackend::openai \ No newline at end of file diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h deleted file mode 100644 index bbdaddfe9..000000000 --- a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h +++ /dev/null @@ -1,106 +0,0 @@ -// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. -// -// Redistribution and use in source and binary forms, with or without -// modification, are permitted provided that the following conditions -// are met: -// * Redistributions of source code must retain the above copyright -// notice, this list of conditions and the following disclaimer. -// * Redistributions in binary form must reproduce the above copyright -// notice, this list of conditions and the following disclaimer in the -// documentation and/or other materials provided with the distribution. -// * Neither the name of NVIDIA CORPORATION nor the names of its -// contributors may be used to endorse or promote products derived -// from this software without specific prior written permission. -// -// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -#pragma once - -#include "../client_backend.h" -#include "common.h" -#include "openai_infer_input.h" - - -namespace tc = triton::client; - -namespace triton { namespace perfanalyzer { namespace clientbackend { -namespace openai { - -class InferResult; -class HttpInferRequest; - -using OpenAiOnCompleteFn = std::function; - -//============================================================================== -/// An HttpClient object is used to perform any kind of communication with the -/// OpenAi service using -/// -/// \code -/// std::unique_ptr client; -/// HttpClient::Create(&client, "localhost:8080"); -/// ... -/// ... -/// \endcode -/// -class HttpClient : public tc::InferenceServerClient { - public: - ~HttpClient(); - - /// TODO: Adjust as needed - /// Create a client that can be used to communicate with the server. - /// \param client Returns a new InferenceServerHttpClient object. - /// \param server_url The inference server name and port. - /// \param verbose If true generate verbose output when contacting - /// the inference server. - /// \return Error object indicating success or failure. - static Error Create( - std::unique_ptr* client, const std::string& server_url, - const bool verbose); - - /// TODO FIXME: Update - /// Run asynchronous inference on server. - Error AsyncInfer( - OpenAiOnCompleteFn callback, const InferOptions& options, - const std::vector& inputs, - const std::vector& outputs = - std::vector(), - const Headers& headers = Headers()); - - private: - HttpClient(const std::string& url, bool verbose); - - // The server url - const std::string url_; -}; - -//====================================================================== - -class InferResult { - public: - static Error Create( - InferResult** infer_result, - std::shared_ptr infer_request); - Error RequestStatus() const { return Error::Success; } // TODO FIXME TKG - Error Id(std::string* id) const { return Error::Success; } // TODO FIXME TKG - - private: - InferResult(std::shared_ptr infer_request); - - // The status of the inference - Error status_; - // The pointer to the HttpInferRequest object - std::shared_ptr infer_request_; -}; - -//====================================================================== - -}}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc index 70d827e85..834e27788 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc @@ -51,9 +51,10 @@ OpenAiInferInput::SetShape(const std::vector& shape) Error OpenAiInferInput::Reset() { + data_str_.clear(); + bufs_.clear(); buf_byte_sizes_.clear(); - bufs_idx_ = 0; byte_size_ = 0; return Error::Success; } @@ -61,18 +62,12 @@ OpenAiInferInput::Reset() Error OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size) { + data_str_.clear(); + byte_size_ += input_byte_size; bufs_.push_back(input); buf_byte_sizes_.push_back(input_byte_size); - - return Error::Success; -} - -Error -OpenAiInferInput::ByteSize(size_t* byte_size) const -{ - *byte_size = byte_size_; return Error::Success; } @@ -80,32 +75,19 @@ Error OpenAiInferInput::PrepareForRequest() { // Reset position so request sends entire input. - bufs_idx_ = 0; - buf_pos_ = 0; - return Error::Success; -} - -Error -OpenAiInferInput::GetNext( - const uint8_t** buf, size_t* input_bytes, bool* end_of_input) -{ - if (bufs_idx_ < bufs_.size()) { - *buf = bufs_[bufs_idx_]; - *input_bytes = buf_byte_sizes_[bufs_idx_]; - bufs_idx_++; - } else { - *buf = nullptr; - *input_bytes = 0; + if (data_str_.empty() && (byte_size_ != 0)) { + for (size_t i = 0; i < bufs_.size(); ++i) { + data_str_.append( + reinterpret_cast(bufs_[i]), buf_byte_sizes_[i]); + } } - *end_of_input = (bufs_idx_ >= bufs_.size()); - return Error::Success; } OpenAiInferInput::OpenAiInferInput( const std::string& name, const std::vector& dims, const std::string& datatype) - : InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims) + : InferInput(BackendKind::OPENAI, name, datatype), shape_(dims) { } diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h index a10b9312f..0c192cfad 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h @@ -51,14 +51,11 @@ class OpenAiInferInput : public InferInput { Error Reset() override; /// See InferInput::AppendRaw() Error AppendRaw(const uint8_t* input, size_t input_byte_size) override; - /// Gets the size of data added into this input in bytes. - /// \param byte_size The size of data added in bytes. - /// \return Error object indicating success or failure. - Error ByteSize(size_t* byte_size) const; - /// Resets the heads to start providing data from the beginning. + /// Prepare the input to be in the form expected by an OpenAI client, + /// must call before accessing the data. Error PrepareForRequest(); - /// Get the next chunk of data if available. - Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input); + /// Get the contiguous data in string. + std::string& DataString() { return data_str_; } private: explicit OpenAiInferInput( @@ -68,9 +65,9 @@ class OpenAiInferInput : public InferInput { std::vector shape_; size_t byte_size_{0}; - size_t bufs_idx_, buf_pos_; std::vector bufs_; std::vector buf_byte_sizes_; + std::string data_str_; }; }}}} // namespace triton::perfanalyzer::clientbackend::openai From 4ab2fc11c1d002239ca66ad7c257da27f5745d4d Mon Sep 17 00:00:00 2001 From: tgerdes Date: Mon, 4 Mar 2024 10:40:00 -0600 Subject: [PATCH 08/23] Pass endpoint to openai client --- .../client_backend/client_backend.cc | 20 +++++++++---------- .../client_backend/client_backend.h | 15 +++++++++----- .../client_backend/openai/openai_client.cc | 6 +++--- .../client_backend/openai/openai_client.h | 6 ++++-- .../openai/openai_client_backend.cc | 8 ++++---- .../openai/openai_client_backend.h | 7 ++++--- src/c++/perf_analyzer/perf_analyzer.cc | 13 ++++++------ 7 files changed, 42 insertions(+), 33 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index 04a68fefb..c665390bb 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -116,8 +116,8 @@ BackendToGrpcType(const GrpcCompressionAlgorithm compression_algorithm) // Error ClientBackendFactory::Create( - const BackendKind kind, const std::string& url, const ProtocolType protocol, - const SslOptionsBase& ssl_options, + const BackendKind kind, const std::string& url, const std::string& endpoint, + const ProtocolType protocol, const SslOptionsBase& ssl_options, const std::map> trace_options, const GrpcCompressionAlgorithm compression_algorithm, std::shared_ptr http_headers, @@ -128,9 +128,10 @@ ClientBackendFactory::Create( std::shared_ptr* factory) { factory->reset(new ClientBackendFactory( - kind, url, protocol, ssl_options, trace_options, compression_algorithm, - http_headers, triton_server_path, model_repository_path, verbose, - metrics_url, input_tensor_format, output_tensor_format)); + kind, url, endpoint, protocol, ssl_options, trace_options, + compression_algorithm, http_headers, triton_server_path, + model_repository_path, verbose, metrics_url, input_tensor_format, + output_tensor_format)); return Error::Success; } @@ -139,7 +140,7 @@ ClientBackendFactory::CreateClientBackend( std::unique_ptr* client_backend) { RETURN_IF_CB_ERROR(ClientBackend::Create( - kind_, url_, protocol_, ssl_options_, trace_options_, + kind_, url_, endpoint_, protocol_, ssl_options_, trace_options_, compression_algorithm_, http_headers_, verbose_, triton_server_path, model_repository_path_, metrics_url_, input_tensor_format_, output_tensor_format_, client_backend)); @@ -157,8 +158,8 @@ ClientBackendFactory::Kind() // Error ClientBackend::Create( - const BackendKind kind, const std::string& url, const ProtocolType protocol, - const SslOptionsBase& ssl_options, + const BackendKind kind, const std::string& url, const std::string& endpoint, + const ProtocolType protocol, const SslOptionsBase& ssl_options, const std::map> trace_options, const GrpcCompressionAlgorithm compression_algorithm, std::shared_ptr http_headers, const bool verbose, @@ -177,10 +178,9 @@ ClientBackend::Create( &local_backend)); } #ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI - // TODO -- I think this needs endpoint to be passed in? else if (kind == OPENAI) { RETURN_IF_CB_ERROR(openai::OpenAiClientBackend::Create( - url, protocol, http_headers, verbose, &local_backend)); + url, endpoint, protocol, http_headers, verbose, &local_backend)); } #endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS diff --git a/src/c++/perf_analyzer/client_backend/client_backend.h b/src/c++/perf_analyzer/client_backend/client_backend.h index 487c215ce..3d1f3e89c 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.h +++ b/src/c++/perf_analyzer/client_backend/client_backend.h @@ -268,6 +268,7 @@ class ClientBackendFactory { /// Create a factory that can be used to construct Client Backends. /// \param kind The kind of client backend to create. /// \param url The inference server url and port. + /// \param endpoint The endpoint on the inference server to send requests to /// \param protocol The protocol type used. /// \param ssl_options The SSL options used with client backend. /// \param compression_algorithm The compression algorithm to be used @@ -290,7 +291,8 @@ class ClientBackendFactory { /// \return Error object indicating success or failure. static Error Create( const BackendKind kind, const std::string& url, - const ProtocolType protocol, const SslOptionsBase& ssl_options, + const std::string& endpoint, const ProtocolType protocol, + const SslOptionsBase& ssl_options, const std::map> trace_options, const GrpcCompressionAlgorithm compression_algorithm, std::shared_ptr http_headers, @@ -309,7 +311,8 @@ class ClientBackendFactory { private: ClientBackendFactory( const BackendKind kind, const std::string& url, - const ProtocolType protocol, const SslOptionsBase& ssl_options, + const std::string& endpoint, const ProtocolType protocol, + const SslOptionsBase& ssl_options, const std::map> trace_options, const GrpcCompressionAlgorithm compression_algorithm, const std::shared_ptr http_headers, @@ -317,8 +320,8 @@ class ClientBackendFactory { const std::string& model_repository_path, const bool verbose, const std::string& metrics_url, const TensorFormat input_tensor_format, const TensorFormat output_tensor_format) - : kind_(kind), url_(url), protocol_(protocol), ssl_options_(ssl_options), - trace_options_(trace_options), + : kind_(kind), url_(url), endpoint_(endpoint), protocol_(protocol), + ssl_options_(ssl_options), trace_options_(trace_options), compression_algorithm_(compression_algorithm), http_headers_(http_headers), triton_server_path(triton_server_path), model_repository_path_(model_repository_path), verbose_(verbose), @@ -329,6 +332,7 @@ class ClientBackendFactory { const BackendKind kind_; const std::string url_; + const std::string endpoint_; const ProtocolType protocol_; const SslOptionsBase& ssl_options_; const std::map> trace_options_; @@ -361,7 +365,8 @@ class ClientBackend { public: static Error Create( const BackendKind kind, const std::string& url, - const ProtocolType protocol, const SslOptionsBase& ssl_options, + const std::string& endpoint, const ProtocolType protocol, + const SslOptionsBase& ssl_options, const std::map> trace_options, const GrpcCompressionAlgorithm compression_algorithm, std::shared_ptr http_headers, const bool verbose, diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index f83c3976b..1bab51bd6 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -68,9 +68,9 @@ ChatCompletionRequest::SendResponse(bool is_final, bool is_null) } ChatCompletionClient::ChatCompletionClient( - const std::string& url, bool verbose, const HttpSslOptions& ssl_options) - : HttpClient( - std::string(url + "/v1/chat/completions"), verbose, ssl_options) + const std::string& url, const std::string& endpoint, bool verbose, + const HttpSslOptions& ssl_options) + : HttpClient(std::string(url + "/" + endpoint), verbose, ssl_options) { } diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h index bff2d299f..5ede83143 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -67,7 +67,7 @@ class ChatCompletionResult : public InferResult { { if ((http_code_ >= 400) && (http_code_ <= 599)) { return Error( - "OpenAI response returns HTTP code" + std::to_string(http_code_)); + "OpenAI response returns HTTP code " + std::to_string(http_code_)); } return Error::Success; } @@ -139,6 +139,7 @@ class ChatCompletionClient : public HttpClient { /// \param server_url The inference server name, port, optional /// scheme and optional base path in the following format: /// host:port/. + /// \param endpoint The name of the endpoint to send requests to /// \param verbose If true generate verbose output when contacting /// the inference server. /// \param ssl_options Specifies the settings for configuring @@ -148,7 +149,8 @@ class ChatCompletionClient : public HttpClient { /// These options will be ignored if the server_url does not /// expose `https://` scheme. ChatCompletionClient( - const std::string& server_url, bool verbose = false, + const std::string& server_url, const std::string& endpoint, + bool verbose = false, const HttpSslOptions& ssl_options = HttpSslOptions()); /// Simplified AsyncInfer() where the request body is expected to be diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc index 9f62beb29..bff94fc70 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -35,9 +35,9 @@ namespace openai { Error OpenAiClientBackend::Create( - const std::string& url, const ProtocolType protocol, - std::shared_ptr http_headers, const bool verbose, - std::unique_ptr* client_backend) + const std::string& url, const std::string& endpoint, + const ProtocolType protocol, std::shared_ptr http_headers, + const bool verbose, std::unique_ptr* client_backend) { if (protocol == ProtocolType::GRPC) { return Error( @@ -47,7 +47,7 @@ OpenAiClientBackend::Create( new OpenAiClientBackend(http_headers)); openai_client_backend->http_client_.reset( - new ChatCompletionClient(url, verbose)); + new ChatCompletionClient(url, endpoint, verbose)); *client_backend = std::move(openai_client_backend); diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h index ea9a49a82..94dbd9729 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h @@ -56,6 +56,7 @@ class OpenAiClientBackend : public ClientBackend { /// Create an OpenAI client backend which can be used to interact with the /// server. /// \param url The inference server url and port. + /// \param endpoint The endpoint on the inference server to send requests to /// \param protocol The protocol type used. /// \param http_headers Map of HTTP headers. The map key/value indicates /// the header name/value. @@ -64,9 +65,9 @@ class OpenAiClientBackend : public ClientBackend { /// object. /// \return Error object indicating success or failure. static Error Create( - const std::string& url, const ProtocolType protocol, - std::shared_ptr http_headers, const bool verbose, - std::unique_ptr* client_backend); + const std::string& url, const std::string& endpoint, + const ProtocolType protocol, std::shared_ptr http_headers, + const bool verbose, std::unique_ptr* client_backend); /// See ClientBackend::AsyncInfer() Error AsyncInfer( diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index a1a5ab635..1928772fb 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -77,12 +77,13 @@ PerfAnalyzer::CreateAnalyzerObjects() std::shared_ptr factory; FAIL_IF_ERR( cb::ClientBackendFactory::Create( - params_->kind, params_->url, params_->protocol, params_->ssl_options, - params_->trace_options, params_->compression_algorithm, - params_->http_headers, params_->triton_server_path, - params_->model_repository_path, params_->extra_verbose, - params_->metrics_url, params_->input_tensor_format, - params_->output_tensor_format, &factory), + params_->kind, params_->url, params_->endpoint, params_->protocol, + params_->ssl_options, params_->trace_options, + params_->compression_algorithm, params_->http_headers, + params_->triton_server_path, params_->model_repository_path, + params_->extra_verbose, params_->metrics_url, + params_->input_tensor_format, params_->output_tensor_format, + &factory), "failed to create client factory"); FAIL_IF_ERR( From ffbf1541b252c179c85420d6c2f47ded17787e93 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Mon, 4 Mar 2024 11:16:19 -0600 Subject: [PATCH 09/23] Resolve fixmes --- src/c++/library/http_client.h | 2 +- .../client_backend/openai/http_client.h | 17 +---------------- .../client_backend/openai/openai_client.cc | 1 - .../client_backend/openai/openai_client.h | 7 ++----- src/c++/perf_analyzer/perf_utils.cc | 4 ---- 5 files changed, 4 insertions(+), 27 deletions(-) diff --git a/src/c++/library/http_client.h b/src/c++/library/http_client.h index d252b40f1..532ea10fb 100644 --- a/src/c++/library/http_client.h +++ b/src/c++/library/http_client.h @@ -49,7 +49,7 @@ struct HttpSslOptions { enum KEYTYPE { KEY_PEM = 0, KEY_DER = 1 - // TODO: Support loading private key from crypto engine + // TODO TMA-1645: Support loading private key from crypto engine // KEY_ENG = 2 }; explicit HttpSslOptions() diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h index c6acfd524..13a0d2e05 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.h @@ -35,19 +35,12 @@ #include #include -// [TODO] Below should already be a generic class for any HTTP use, -// relocate it so that it can be used elsewhere namespace triton { namespace perfanalyzer { namespace clientbackend { namespace openai { -// [FIXME] add back "parameter" handling -// [FIXME] add back "compression" handling - /// The key-value map type to be included in the request /// as custom headers. typedef std::map Headers; -/// The key-value map type to be included as URL parameters. -typedef std::map Parameters; // The options for authorizing and authenticating SSL/TLS connections. struct HttpSslOptions { @@ -55,7 +48,7 @@ struct HttpSslOptions { enum KEYTYPE { KEY_PEM = 0, KEY_DER = 1 - // TODO: Support loading private key from crypto engine + // TODO TMA-1645: Support loading private key from crypto engine // KEY_ENG = 2 }; explicit HttpSslOptions() @@ -115,11 +108,6 @@ class HttpRequest { // actual amount copied in 'input_bytes'. void GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes); - // [FIXME] define default callback like - // CURLOPT_READFUNCTION, CURLOPT_WRITEFUNCTION here? - // the specialized HttpRequest can override the callbacks when read / write - // schema has changed. - // Buffer that accumulates the response body. std::string response_buffer_; @@ -158,8 +146,6 @@ class HttpClient { // Note that this function does not block void Send(CURL* handle, std::unique_ptr&& request); - // [FIXME] provide more helper functions to encapsulate CURL detail - protected: void AsyncTransfer(); @@ -184,7 +170,6 @@ class HttpClient { bool verbose_; private: - // [FIXME] should belong to SSL option struct as helper function const std::string& ParseSslKeyType(HttpSslOptions::KEYTYPE key_type); const std::string& ParseSslCertType(HttpSslOptions::CERTTYPE cert_type); }; diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index 1bab51bd6..c7502657a 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -131,7 +131,6 @@ ChatCompletionClient::ResponseHandler( // Send response now if streaming, otherwise wait until request has been // completed if (request->is_stream_) { - // [FIXME] assume it is proper chunked of response auto done_signal = (request->response_buffer_.find("data: [DONE]") != std::string::npos); request->SendResponse( diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h index 5ede83143..a3b1853e3 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -79,8 +79,8 @@ class ChatCompletionResult : public InferResult { const std::string& output_name, const uint8_t** buf, size_t* byte_size) const override { - // [FIXME] disregard "output_name" which is not compatible to - // OpenAI protocol + // There is only a single output (and it has no defined name), so we can + // disregard output_name *buf = reinterpret_cast(serialized_response_.c_str()); *byte_size = serialized_response_.size(); return Error::Success; @@ -162,9 +162,6 @@ class ChatCompletionClient : public HttpClient { const InferStat& ClientInferStat() { return infer_stat_; } - /// [TODO?] Add AsyncInfer() variant that prepare the request body from - /// function arguments. Similar to Triton client library. - private: // setup curl handle Error PreRunProcessing(CURL* curl, ChatCompletionRequest* request); diff --git a/src/c++/perf_analyzer/perf_utils.cc b/src/c++/perf_analyzer/perf_utils.cc index 4c02f56ca..144eaa48a 100644 --- a/src/c++/perf_analyzer/perf_utils.cc +++ b/src/c++/perf_analyzer/perf_utils.cc @@ -212,10 +212,6 @@ SerializeExplicitTensor( std::string element = buffer.GetString(); uint32_t len = element.size(); - // FIXME TODO - for BYTES we add the length. Is there any reason that - // would be needed here? - // serialized.append(reinterpret_cast(&len), - // sizeof(uint32_t)); serialized.append(element); } std::copy( From 4a8684ed79a3ef977275c977ddb13941694a0678 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Mon, 4 Mar 2024 11:36:34 -0600 Subject: [PATCH 10/23] update copyright years --- src/c++/perf_analyzer/client_backend/client_backend.cc | 2 +- src/c++/perf_analyzer/command_line_parser.cc | 2 +- src/c++/perf_analyzer/command_line_parser.h | 2 +- src/c++/perf_analyzer/model_parser.cc | 2 +- src/c++/perf_analyzer/model_parser.h | 2 +- src/c++/perf_analyzer/perf_analyzer.cc | 2 +- src/c++/perf_analyzer/perf_utils.cc | 2 +- 7 files changed, 7 insertions(+), 7 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index c665390bb..01585281b 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 9bcc5d46f..18af4d994 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/command_line_parser.h b/src/c++/perf_analyzer/command_line_parser.h index 79d387811..cbd807eb4 100644 --- a/src/c++/perf_analyzer/command_line_parser.h +++ b/src/c++/perf_analyzer/command_line_parser.h @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/model_parser.cc b/src/c++/perf_analyzer/model_parser.cc index 30e149c0c..1ab9f7a6d 100644 --- a/src/c++/perf_analyzer/model_parser.cc +++ b/src/c++/perf_analyzer/model_parser.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/model_parser.h b/src/c++/perf_analyzer/model_parser.h index c1e16bac7..c1400d079 100644 --- a/src/c++/perf_analyzer/model_parser.h +++ b/src/c++/perf_analyzer/model_parser.h @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/perf_analyzer.cc b/src/c++/perf_analyzer/perf_analyzer.cc index 1928772fb..ced5fc991 100644 --- a/src/c++/perf_analyzer/perf_analyzer.cc +++ b/src/c++/perf_analyzer/perf_analyzer.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/perf_utils.cc b/src/c++/perf_analyzer/perf_utils.cc index 144eaa48a..445dd7c54 100644 --- a/src/c++/perf_analyzer/perf_utils.cc +++ b/src/c++/perf_analyzer/perf_utils.cc @@ -1,4 +1,4 @@ -// Copyright 2020-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions From 9e8533e479800f557cc434cdade17624221e06d8 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Mon, 4 Mar 2024 12:30:14 -0600 Subject: [PATCH 11/23] more cleanup --- src/c++/library/http_client.h | 2 +- src/c++/perf_analyzer/client_backend/openai/http_client.cc | 2 +- src/c++/perf_analyzer/test_command_line_parser.cc | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/src/c++/library/http_client.h b/src/c++/library/http_client.h index 532ea10fb..3a94f3fde 100644 --- a/src/c++/library/http_client.h +++ b/src/c++/library/http_client.h @@ -1,4 +1,4 @@ -// Copyright 2020-2022, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.cc b/src/c++/perf_analyzer/client_backend/openai/http_client.cc index 4c8632c52..ff636388b 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.cc @@ -226,7 +226,7 @@ HttpClient::AsyncTransfer() continue; } - long http_code = 400; + uint32_t http_code = 400; if (msg->data.result == CURLE_OK) { curl_easy_getinfo( msg->easy_handle, CURLINFO_RESPONSE_CODE, &http_code); diff --git a/src/c++/perf_analyzer/test_command_line_parser.cc b/src/c++/perf_analyzer/test_command_line_parser.cc index 6428a0f2f..2527d2b1b 100644 --- a/src/c++/perf_analyzer/test_command_line_parser.cc +++ b/src/c++/perf_analyzer/test_command_line_parser.cc @@ -1,4 +1,4 @@ -// Copyright 2022-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +// Copyright 2022-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. // // Redistribution and use in source and binary forms, with or without // modification, are permitted provided that the following conditions From 381184873095234448b27d305967d1d92f942aa2 Mon Sep 17 00:00:00 2001 From: Timothy Gerdes <50968584+tgerdesnv@users.noreply.github.com> Date: Tue, 5 Mar 2024 09:21:29 -0600 Subject: [PATCH 12/23] Update src/c++/perf_analyzer/command_line_parser.cc Co-authored-by: dyastremsky <58150256+dyastremsky@users.noreply.github.com> --- src/c++/perf_analyzer/command_line_parser.cc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 18af4d994..1154fc212 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1922,7 +1922,7 @@ CLParser::VerifyOptions() if (params_->endpoint.empty()) { Usage( "Must supply --endpoint for OpenAI service kind. For example, " - "\"v1/chat/completions\""); + "\"v1/chat/completions\"."); } } From 305e96cc48e8e3fc5ea1e9e748e64b13fefc7168 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 09:25:58 -0600 Subject: [PATCH 13/23] remove 'file' from top of files --- src/c++/library/common.h | 2 -- src/c++/library/grpc_client.h | 2 -- src/c++/library/http_client.h | 2 -- src/c++/perf_analyzer/client_backend/openai/openai_client.h | 2 -- 4 files changed, 8 deletions(-) diff --git a/src/c++/library/common.h b/src/c++/library/common.h index 9cf99c478..133a32143 100644 --- a/src/c++/library/common.h +++ b/src/c++/library/common.h @@ -25,8 +25,6 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -/// \file - #include #include #include diff --git a/src/c++/library/grpc_client.h b/src/c++/library/grpc_client.h index cc90b12de..7609c10b7 100644 --- a/src/c++/library/grpc_client.h +++ b/src/c++/library/grpc_client.h @@ -25,8 +25,6 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -/// \file - #include #include diff --git a/src/c++/library/http_client.h b/src/c++/library/http_client.h index 3a94f3fde..e06b2eef3 100644 --- a/src/c++/library/http_client.h +++ b/src/c++/library/http_client.h @@ -25,8 +25,6 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -/// \file - #include #include diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_client.h index a3b1853e3..db58520d1 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.h @@ -25,8 +25,6 @@ // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #pragma once -/// \file - #include #include From dd4c4ca84154ec45149e13ddffc3146bb3428b6c Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 10:00:34 -0600 Subject: [PATCH 14/23] clean up help message and add endpoint to help --- .../openai/openai_client_backend.cc | 2 +- .../openai/openai_infer_input.h | 4 +- src/c++/perf_analyzer/command_line_parser.cc | 67 ++++++++++++------- 3 files changed, 44 insertions(+), 29 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc index bff94fc70..1296a519c 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc @@ -67,7 +67,7 @@ OpenAiClientBackend::AsyncInfer( auto raw_input = dynamic_cast(inputs[0]); raw_input->PrepareForRequest(); RETURN_IF_CB_ERROR(http_client_->AsyncInfer( - callback, raw_input->DataString(), options.request_id_)); + callback, raw_input->GetRequestBody(), options.request_id_)); return Error::Success; } diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h index 0c192cfad..f5fd5ea42 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h @@ -54,8 +54,8 @@ class OpenAiInferInput : public InferInput { /// Prepare the input to be in the form expected by an OpenAI client, /// must call before accessing the data. Error PrepareForRequest(); - /// Get the contiguous data in string. - std::string& DataString() { return data_str_; } + /// Get the contiguous request body string + std::string& GetRequestBody() { return data_str_; } private: explicit OpenAiInferInput( diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 1154fc212..a74df4e25 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -98,13 +98,15 @@ CLParser::Usage(const std::string& msg) std::cerr << "Usage: " << argv_[0] << " [options]" << std::endl; std::cerr << "==== SYNOPSIS ====\n \n"; std::cerr << "\t--version " << std::endl; - std::cerr << "\t--service-kind " - "<\"triton\"|\"tfserving\"|\"torchserve\"|\"triton_c_api\">" - << std::endl; std::cerr << "\t-m " << std::endl; std::cerr << "\t-x " << std::endl; - std::cerr << "\t--bls-composing-models=" << std::endl; + std::cerr << "\t--bls-composing-models " << std::endl; std::cerr << "\t--model-signature-name " << std::endl; + std::cerr + << "\t--service-kind " + "<\"triton\"|\"openai\"|\"tfserving\"|\"torchserve\"|\"triton_c_api\">" + << std::endl; + std::cerr << "\t--endpoint " << std::endl; std::cerr << "\t-v" << std::endl; std::cerr << std::endl; std::cerr << "I. MEASUREMENT PARAMETERS: " << std::endl; @@ -151,8 +153,8 @@ CLParser::Usage(const std::string& msg) std::cerr << "\t--sequence-id-range " << std::endl; std::cerr << "\t--string-length " << std::endl; std::cerr << "\t--string-data " << std::endl; - std::cerr << "\t--input-tensor-format=[binary|json]" << std::endl; - std::cerr << "\t--output-tensor-format=[binary|json]" << std::endl; + std::cerr << "\t--input-tensor-format [binary|json]" << std::endl; + std::cerr << "\t--output-tensor-format [binary|json]" << std::endl; std::cerr << "\tDEPRECATED OPTIONS" << std::endl; std::cerr << "\t-z" << std::endl; std::cerr << "\t--data-directory " << std::endl; @@ -196,21 +198,6 @@ CLParser::Usage(const std::string& msg) 18) << std::endl; - std::cerr - << FormatMessage( - " --service-kind: Describes the kind of service perf_analyzer to " - "generate load for. The options are \"triton\", \"triton_c_api\", " - "\"tfserving\" and \"torchserve\". Default value is \"triton\". " - "Note in order to use \"torchserve\" backend --input-data option " - "must point to a json file holding data in the following format " - "{\"data\" : [{\"TORCHSERVE_INPUT\" : [\"\"]}, {...}...]}. The type of file here will depend " - "on the model. In order to use \"triton_c_api\" you must specify " - "the Triton server install path and the model repository path via " - "the --triton-server-directory and --model-repository flags", - 18) - << std::endl; - std::cerr << std::setw(9) << std::left << " -m: " << FormatMessage( @@ -232,6 +219,33 @@ CLParser::Usage(const std::string& msg) "\"tfserving\".", 18) << std::endl; + + std::cerr + << FormatMessage( + " --service-kind: Describes the kind of service perf_analyzer to " + "generate load for. The options are \"triton\", \"openai\", " + "\"triton_c_api\", \"tfserving\" and \"torchserve\". Default " + "value is \"triton\". Note in order to use \"openai\" you must " + "specify an endpoint via --endpoint. " + "Note in order to use \"torchserve\" backend --input-data option " + "must point to a json file holding data in the following format " + "{\"data\" : [{\"TORCHSERVE_INPUT\" : [\"\"]}, {...}...]}. The type of file here will depend " + "on the model. In order to use \"triton_c_api\" you must specify " + "the Triton server install path and the model repository path via " + "the --triton-server-directory and --model-repository flags", + 18) + << std::endl; + + std::cerr + << FormatMessage( + " --endpoint: Describes what endpoint to send requests to on the " + "server. This is required when using \"openai\" service-kind, and " + "is ignored for all other cases. Currently only " + "\"v1/chat/completions\" is confirmed to work.", + 18) + << std::endl; + std::cerr << std::setw(9) << std::left << " -v: " << FormatMessage("Enables verbose mode.", 9) << std::endl; @@ -303,7 +317,7 @@ CLParser::Usage(const std::string& msg) << std::endl; std::cerr << FormatMessage( - "--periodic-concurrency-range : Determines the " + " --periodic-concurrency-range : Determines the " "range of concurrency levels in the similar but slightly " "different manner as the --concurrency-range. Perf Analyzer will " "start from the concurrency level of 'start' and increase by " @@ -323,7 +337,7 @@ CLParser::Usage(const std::string& msg) << std::endl; std::cerr << FormatMessage( - "--request-period : Indicates the number of responses that " + " --request-period : Indicates the number of responses that " "each request must receive before new, concurrent requests are " "sent when --periodic-concurrency-range is specified. Default " "value is 10.", @@ -331,7 +345,7 @@ CLParser::Usage(const std::string& msg) << std::endl; std::cerr << FormatMessage( - "--request-parameter : Specifies a custom " + " --request-parameter : Specifies a custom " "parameter that can be sent to a Triton backend as part of the " "request. For example, providing '--request-parameter " "max_tokens:256:int' to the command line will set an additional " @@ -382,7 +396,7 @@ CLParser::Usage(const std::string& msg) << std::endl; std::cerr << FormatMessage( - "--binary-search: Enables the binary search on the specified " + " --binary-search: Enables the binary search on the specified " "search range. This option requires 'start' and 'end' to be " "expilicitly specified in the --concurrency-range or " "--request-rate-range. When using this option, 'step' is more " @@ -393,7 +407,7 @@ CLParser::Usage(const std::string& msg) << std::endl; std::cerr << FormatMessage( - "--num-of-sequences: Sets the number of concurrent " + " --num-of-sequences: Sets the number of concurrent " "sequences for sequence models. This option is ignored when " "--request-rate-range is not specified. By default, its " "value is 4.", @@ -1613,6 +1627,7 @@ CLParser::ParseCommandLine(int argc, char** argv) } case 62: { params_->endpoint = optarg; + break; } case 'v': params_->extra_verbose = params_->verbose; From cac8bff625bb3cec4423130d70bb70c124f559f0 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 10:58:54 -0600 Subject: [PATCH 15/23] Fix client stats --- .../perf_analyzer/client_backend/openai/openai_client.cc | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index c7502657a..ccd23e6ff 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -158,12 +158,12 @@ ChatCompletionClient::AsyncInfer( auto completion_callback = [this](HttpRequest* req) { auto request = static_cast(req); - if (!request->is_stream_) { - request->SendResponse(true /* is_final */, false /* is_null */); - } request->timer_.CaptureTimestamp( triton::client::RequestTimers::Kind::REQUEST_END); UpdateInferStat(request->timer_); + if (!request->is_stream_) { + request->SendResponse(true /* is_final */, false /* is_null */); + } }; std::unique_ptr request(new ChatCompletionRequest( std::move(completion_callback), std::move(callback), request_id, @@ -185,6 +185,8 @@ ChatCompletionClient::AsyncInfer( raw_request->timer_.CaptureTimestamp( triton::client::RequestTimers::Kind::SEND_START); Send(multi_easy_handle, std::move(request)); + raw_request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::SEND_END); return Error::Success; } From 5b7434ae3fc3c72dfa4c42401961c381be548e6c Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 11:46:03 -0600 Subject: [PATCH 16/23] remove unused fn --- .../client_backend/openai/openai_client_backend.h | 3 --- 1 file changed, 3 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h index 94dbd9729..f6c6490c6 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.h @@ -84,9 +84,6 @@ class OpenAiClientBackend : public ClientBackend { { } - void ParseInferStat( - const tc::InferStat& openai_infer_stat, InferStat* infer_stat); - std::unique_ptr http_client_; std::shared_ptr http_headers_; }; From 5eab7b53ead585e0ea96f91d52ec57b25d538240 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 11:58:23 -0600 Subject: [PATCH 17/23] Assert on json input format --- src/c++/perf_analyzer/perf_utils.cc | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) diff --git a/src/c++/perf_analyzer/perf_utils.cc b/src/c++/perf_analyzer/perf_utils.cc index 445dd7c54..6088c1b6b 100644 --- a/src/c++/perf_analyzer/perf_utils.cc +++ b/src/c++/perf_analyzer/perf_utils.cc @@ -205,7 +205,13 @@ SerializeExplicitTensor( } else if (dt.compare("JSON") == 0) { std::string serialized = ""; - for (const auto& value : tensor.GetArray()) { + auto values = tensor.GetArray(); + if (values.Size() != 1) { + return cb::Error( + "JSON format does not yet support multiple json objects in the " + "input"); + } + for (const auto& value : values) { rapidjson::StringBuffer buffer; rapidjson::Writer writer(buffer); value.Accept(writer); From 603631a9a48081df7fedcbaae4faee6462a164d3 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 12:41:41 -0600 Subject: [PATCH 18/23] Use a single SEND_END point --- .../perf_analyzer/client_backend/openai/openai_client.cc | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index ccd23e6ff..362278436 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -84,10 +84,8 @@ ChatCompletionClient::RequestProvider( request->GetNextInput( reinterpret_cast(contents), size * nmemb, &input_bytes); - if (input_bytes == 0) { - request->timer_.CaptureTimestamp( - triton::client::RequestTimers::Kind::SEND_END); - } + request->timer_.CaptureTimestamp( + triton::client::RequestTimers::Kind::SEND_END); return input_bytes; } @@ -185,8 +183,6 @@ ChatCompletionClient::AsyncInfer( raw_request->timer_.CaptureTimestamp( triton::client::RequestTimers::Kind::SEND_START); Send(multi_easy_handle, std::move(request)); - raw_request->timer_.CaptureTimestamp( - triton::client::RequestTimers::Kind::SEND_END); return Error::Success; } From 40d64654ca125d912491cd92c38dc01ea113b062 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Tue, 5 Mar 2024 13:31:56 -0600 Subject: [PATCH 19/23] Add sync assert. Add OPENAI to helper fn --- src/c++/perf_analyzer/client_backend/client_backend.cc | 3 +++ src/c++/perf_analyzer/command_line_parser.cc | 3 +++ 2 files changed, 6 insertions(+) diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc index 01585281b..92546d36d 100644 --- a/src/c++/perf_analyzer/client_backend/client_backend.cc +++ b/src/c++/perf_analyzer/client_backend/client_backend.cc @@ -90,6 +90,9 @@ BackendKindToString(const BackendKind kind) case TRITON_C_API: return std::string("TRITON_C_API"); break; + case OPENAI: + return std::string("OPENAI"); + break; default: return std::string("UNKNOWN"); break; diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index a74df4e25..42f9044c7 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1939,6 +1939,9 @@ CLParser::VerifyOptions() "Must supply --endpoint for OpenAI service kind. For example, " "\"v1/chat/completions\"."); } + if (!params_->async) { + Usage("Only async mode is currently supported for OpenAI service-kind"); + } } if (params_->should_collect_metrics && From 7827fee88959a1f06f1699357c59e52edef967d5 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 6 Mar 2024 08:59:29 -0600 Subject: [PATCH 20/23] remove unused typedef --- src/c++/perf_analyzer/client_backend/openai/http_client.h | 4 ---- 1 file changed, 4 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h index 13a0d2e05..3c311569e 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.h @@ -38,10 +38,6 @@ namespace triton { namespace perfanalyzer { namespace clientbackend { namespace openai { -/// The key-value map type to be included in the request -/// as custom headers. -typedef std::map Headers; - // The options for authorizing and authenticating SSL/TLS connections. struct HttpSslOptions { enum CERTTYPE { CERT_PEM = 0, CERT_DER = 1 }; From 7df09ef48ea9aae128c4a625cf62144acac9a5f6 Mon Sep 17 00:00:00 2001 From: tgerdes Date: Wed, 6 Mar 2024 15:27:24 -0600 Subject: [PATCH 21/23] Add batch size assert --- src/c++/perf_analyzer/command_line_parser.cc | 3 +++ 1 file changed, 3 insertions(+) diff --git a/src/c++/perf_analyzer/command_line_parser.cc b/src/c++/perf_analyzer/command_line_parser.cc index 42f9044c7..9c8ebacac 100644 --- a/src/c++/perf_analyzer/command_line_parser.cc +++ b/src/c++/perf_analyzer/command_line_parser.cc @@ -1942,6 +1942,9 @@ CLParser::VerifyOptions() if (!params_->async) { Usage("Only async mode is currently supported for OpenAI service-kind"); } + if (params_->batch_size != 1) { + Usage("Batching is not currently supported with OpenAI service-kind"); + } } if (params_->should_collect_metrics && From 605217633fcb4736e62876eb46ccff688a28370d Mon Sep 17 00:00:00 2001 From: GuanLuo <41310872+GuanLuo@users.noreply.github.com> Date: Wed, 6 Mar 2024 13:33:39 -0800 Subject: [PATCH 22/23] Address comment (#487) * Address comment * Update src/c++/perf_analyzer/client_backend/openai/openai_client.cc * Update src/c++/perf_analyzer/client_backend/openai/http_client.cc * formatting --------- Co-authored-by: Timothy Gerdes <50968584+tgerdesnv@users.noreply.github.com> Co-authored-by: tgerdes --- .../client_backend/openai/http_client.cc | 27 ++++++++++++------- .../client_backend/openai/http_client.h | 1 + .../client_backend/openai/openai_client.cc | 18 ++++++++++--- 3 files changed, 34 insertions(+), 12 deletions(-) diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.cc b/src/c++/perf_analyzer/client_backend/openai/http_client.cc index ff636388b..08e4b4b3c 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.cc @@ -76,19 +76,25 @@ HttpRequest::GetNextInput(uint8_t* buf, size_t size, size_t* input_bytes) } } +std::mutex HttpClient::curl_init_mtx_{}; HttpClient::HttpClient( const std::string& server_url, bool verbose, const HttpSslOptions& ssl_options) : url_(server_url), verbose_(verbose), ssl_options_(ssl_options) { - auto* ver = curl_version_info(CURLVERSION_NOW); - if (ver->features & CURL_VERSION_THREADSAFE == 0) { - throw std::runtime_error( - "HTTP client has dependency on CURL library to have thread-safe " - "support (CURL_VERSION_THREADSAFE set)"); - } - if (curl_global_init(CURL_GLOBAL_ALL) != 0) { - throw std::runtime_error("CURL global initialization failed"); + // [TODO TMA-1670] uncomment below and remove class-wise mutex once confirm + // curl >= 7.84.0 will always be used + // auto* ver = curl_version_info(CURLVERSION_NOW); + // if (ver->features & CURL_VERSION_THREADSAFE == 0) { + // throw std::runtime_error( + // "HTTP client has dependency on CURL library to have thread-safe " + // "support (CURL_VERSION_THREADSAFE set)"); + // } + { + std::lock_guard lk(curl_init_mtx_); + if (curl_global_init(CURL_GLOBAL_ALL) != 0) { + throw std::runtime_error("CURL global initialization failed"); + } } multi_handle_ = curl_multi_init(); @@ -114,7 +120,10 @@ HttpClient::~HttpClient() } curl_multi_cleanup(multi_handle_); - curl_global_cleanup(); + { + std::lock_guard lk(curl_init_mtx_); + curl_global_cleanup(); + } } const std::string& diff --git a/src/c++/perf_analyzer/client_backend/openai/http_client.h b/src/c++/perf_analyzer/client_backend/openai/http_client.h index 3c311569e..6b78d836e 100644 --- a/src/c++/perf_analyzer/client_backend/openai/http_client.h +++ b/src/c++/perf_analyzer/client_backend/openai/http_client.h @@ -168,5 +168,6 @@ class HttpClient { private: const std::string& ParseSslKeyType(HttpSslOptions::KEYTYPE key_type); const std::string& ParseSslCertType(HttpSslOptions::CERTTYPE cert_type); + static std::mutex curl_init_mtx_; }; }}}} // namespace triton::perfanalyzer::clientbackend::openai diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc index 362278436..28e55f3c0 100644 --- a/src/c++/perf_analyzer/client_backend/openai/openai_client.cc +++ b/src/c++/perf_analyzer/client_backend/openai/openai_client.cc @@ -114,8 +114,21 @@ size_t ChatCompletionClient::ResponseHandler( void* contents, size_t size, size_t nmemb, void* userp) { - // [WIP] verify if the SSE responses received are complete, or the response - // need to be stitched first + // [TODO TMA-1666] verify if the SSE responses received are complete, or the + // response need to be stitched first. To verify, print out the received + // responses from SendResponse() to make sure the OpenAI server doesn't chunk + // the HTTP responses in the way that misaligns with the SSE responses. Reason + // of not stitching responses now is that it is a bit complicated that to make + // the write callback bulletproof is to assume the response can be chunked at + // arbitrary position, then bake in checking for SSE style (data:.*\n\n) by + // iterating all received buffer character by character. + size_t result_bytes = size * nmemb; + // return early if the response is empty as the response handling is + // triggered by the content of the response. + if (result_bytes == 0) { + return result_bytes; + } + auto request = reinterpret_cast(userp); if (request->timer_.Timestamp( triton::client::RequestTimers::Kind::RECV_START) == 0) { @@ -124,7 +137,6 @@ ChatCompletionClient::ResponseHandler( } char* buf = reinterpret_cast(contents); - size_t result_bytes = size * nmemb; request->response_buffer_.append(buf, result_bytes); // Send response now if streaming, otherwise wait until request has been // completed From 46a03db6eef16b47362ee620b993e05367b6cca8 Mon Sep 17 00:00:00 2001 From: Timothy Gerdes <50968584+tgerdesnv@users.noreply.github.com> Date: Wed, 6 Mar 2024 16:09:42 -0600 Subject: [PATCH 23/23] Make copy of exported data so it isn't corrupted (#488) --- src/c++/perf_analyzer/infer_context.cc | 2 +- .../perf_analyzer/profile_data_exporter.cc | 4 +-- src/c++/perf_analyzer/request_record.h | 30 +++++++++++++++---- .../test_profile_data_collector.cc | 13 +++++--- 4 files changed, 37 insertions(+), 12 deletions(-) diff --git a/src/c++/perf_analyzer/infer_context.cc b/src/c++/perf_analyzer/infer_context.cc index 4e998428b..6da86fef3 100644 --- a/src/c++/perf_analyzer/infer_context.cc +++ b/src/c++/perf_analyzer/infer_context.cc @@ -188,7 +188,7 @@ InferContext::GetOutput(const cb::InferResult& infer_result) const uint8_t* buf{nullptr}; size_t byte_size{0}; infer_result.RawData(requested_output->Name(), &buf, &byte_size); - output[requested_output->Name()] = {buf, byte_size}; + output.emplace(requested_output->Name(), ResponseData(buf, byte_size)); } return output; } diff --git a/src/c++/perf_analyzer/profile_data_exporter.cc b/src/c++/perf_analyzer/profile_data_exporter.cc index 3bcd6f83e..d840c460d 100644 --- a/src/c++/perf_analyzer/profile_data_exporter.cc +++ b/src/c++/perf_analyzer/profile_data_exporter.cc @@ -160,8 +160,8 @@ ProfileDataExporter::AddResponseOutputs( rapidjson::Value response_output_json(rapidjson::kObjectType); for (const auto& output : response_output) { const auto& name{output.first}; - const auto& buf{output.second.first}; - const auto& byte_size{output.second.second}; + const auto& buf{output.second.data_.get()}; + const auto& byte_size{output.second.size_}; rapidjson::Value name_json(name.c_str(), document_.GetAllocator()); rapidjson::Value output_json{}; if (buf != nullptr) { diff --git a/src/c++/perf_analyzer/request_record.h b/src/c++/perf_analyzer/request_record.h index fd6252a57..b4d122eb7 100644 --- a/src/c++/perf_analyzer/request_record.h +++ b/src/c++/perf_analyzer/request_record.h @@ -33,20 +33,40 @@ namespace triton { namespace perfanalyzer { +/// A record containing the data of a single response +struct ResponseData { + ResponseData(const uint8_t* buf, size_t size) + { + uint8_t* array = new uint8_t[size]; + std::memcpy(array, buf, size); + data_ = std::shared_ptr(array, [](uint8_t* p) { delete[] p; }); + size_ = size; + } + + // Define equality comparison operator so it can be inserted into maps + bool operator==(const ResponseData& other) const + { + if (size_ != other.size_) + return false; + // Compare the contents of the arrays + return std::memcmp(data_.get(), other.data_.get(), size_) == 0; + } + + std::shared_ptr data_; + size_t size_; +}; + /// A record of an individual request struct RequestRecord { - using ResponseOutput = - std::unordered_map>; + using ResponseOutput = std::unordered_map; RequestRecord( std::chrono::time_point start_time = std::chrono::time_point(), std::vector> response_timestamps = {}, - std::vector< - std::unordered_map>> - response_outputs = {}, + std::vector response_outputs = {}, bool sequence_end = true, bool delayed = false, uint64_t sequence_id = 0, bool has_null_last_response = false) : start_time_(start_time), response_timestamps_(response_timestamps), diff --git a/src/c++/perf_analyzer/test_profile_data_collector.cc b/src/c++/perf_analyzer/test_profile_data_collector.cc index b6ce7ffab..dfed394ac 100644 --- a/src/c++/perf_analyzer/test_profile_data_collector.cc +++ b/src/c++/perf_analyzer/test_profile_data_collector.cc @@ -63,10 +63,13 @@ TEST_CASE("profile_data_collector: AddData") auto request1_timestamp{clock_epoch + std::chrono::nanoseconds(1)}; auto request1_response1_timestamp{clock_epoch + std::chrono::nanoseconds(2)}; auto request1_response2_timestamp{clock_epoch + std::chrono::nanoseconds(3)}; + uint8_t fake_data[] = {0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08}; RequestRecord::ResponseOutput request1_response1_output{ - {"key1", {nullptr, 1}}, {"key2", {nullptr, 2}}}; + {"key1", ResponseData(fake_data, 1)}, + {"key2", ResponseData(fake_data, 2)}}; RequestRecord::ResponseOutput request1_response2_output{ - {"key3", {nullptr, 3}}, {"key4", {nullptr, 4}}}; + {"key3", ResponseData(fake_data, 3)}, + {"key4", ResponseData(fake_data, 4)}}; RequestRecord request_record1{ request1_timestamp, @@ -83,9 +86,11 @@ TEST_CASE("profile_data_collector: AddData") auto request2_response1_timestamp{clock_epoch + std::chrono::nanoseconds(5)}; auto request2_response2_timestamp{clock_epoch + std::chrono::nanoseconds(6)}; RequestRecord::ResponseOutput request2_response1_output{ - {"key5", {nullptr, 5}}, {"key6", {nullptr, 6}}}; + {"key5", ResponseData(fake_data, 5)}, + {"key6", ResponseData(fake_data, 6)}}; RequestRecord::ResponseOutput request2_response2_output{ - {"key7", {nullptr, 7}}, {"key8", {nullptr, 8}}}; + {"key7", ResponseData(fake_data, 7)}, + {"key8", ResponseData(fake_data, 8)}}; RequestRecord request_record2{ request2_timestamp,