New JSON datatype for PA. Show json data available at http_client level

triton-inference-server · Feb 28, 2024 · 5421506 · 5421506
1 parent 6257def
commit 5421506
Show file tree

Hide file tree

Showing 8 changed files with 292 additions and 12 deletions.
diff --git a/src/c++/perf_analyzer/client_backend/client_backend.cc b/src/c++/perf_analyzer/client_backend/client_backend.cc
@@ -435,10 +435,7 @@ InferInput::Create(
 #ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
   else if (kind == OPENAI) {
     RETURN_IF_CB_ERROR(
-        // FIXME TODO TKG
-        // openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
-        tritonremote::TritonInferInput::Create(
-            infer_input, name, dims, datatype));
+        openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
   }
 #endif  // TRITON_ENABLE_PERF_ANALYZER_OPENAI
 #ifdef TRITON_ENABLE_PERF_ANALYZER_TFS

diff --git a/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt b/src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
@@ -30,12 +30,14 @@ set(
     OPENAI_CLIENT_BACKEND_SRCS
     openai_client_backend.cc
     openai_http_client.cc
+    openai_infer_input.cc
 )
 
 set(
     OPENAI_CLIENT_BACKEND_HDRS
     openai_client_backend.h
     openai_http_client.h
+    openai_infer_input.h
 )
 
 add_library(

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc b/src/c++/perf_analyzer/client_backend/openai/openai_client_backend.cc
@@ -42,11 +42,10 @@ OpenAiClientBackend::Create(
         "perf_analyzer does not support gRPC protocol with OpenAI endpoints");
   }
   std::unique_ptr<OpenAiClientBackend> openai_client_backend(
-    new OpenAiClientBackend(http_headers));
+      new OpenAiClientBackend(http_headers));
 
-  // TODO: Adjust as needed
-  RETURN_IF_CB_ERROR(HttpClient::Create(
-      &(openai_client_backend->http_client_), url, verbose));
+  RETURN_IF_CB_ERROR(
+      HttpClient::Create(&(openai_client_backend->http_client_), url, verbose));
 
   *client_backend = std::move(openai_client_backend);
 
@@ -64,8 +63,8 @@ OpenAiClientBackend::AsyncInfer(
     callback(result);
   };
 
-  // TODO: make an async infer call
-  //RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...));
+  RETURN_IF_CB_ERROR(http_client_->AsyncInfer(
+      wrapped_callback, options, inputs, outputs, *http_headers_));
 
   return Error::Success;
 }
@@ -112,8 +111,7 @@ OpenAiInferRequestedOutput::Create(
   return Error::Success;
 }
 
-OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
-    const std::string& name)
+OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(const std::string& name)
     : InferRequestedOutput(BackendKind::OPENAI, name)
 {
 }

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc
@@ -26,6 +26,8 @@
 
 #include "openai_http_client.h"
 
+#include <rapidjson/rapidjson.h>
+
 
 namespace triton { namespace perfanalyzer { namespace clientbackend {
 namespace openai {
@@ -40,6 +42,64 @@ HttpClient::Create(
   return Error::Success;
 }
 
+Error
+HttpClient::AsyncInfer(
+    OpenAiOnCompleteFn callback, const InferOptions& options,
+    const std::vector<InferInput*>& inputs,
+    const std::vector<const InferRequestedOutput*>& outputs,
+    const Headers& headers)
+{
+  // TODO FIXME implement
+
+  // TODO FIXME cleanup or remove this. It just proves the json data arrives
+  rapidjson::Document d{};
+
+  if (inputs.size() != 1) {
+    return Error("Only expecting one input");
+  }
+
+  auto raw_input = dynamic_cast<OpenAiInferInput*>(inputs[0]);
+
+  raw_input->PrepareForRequest();
+  bool end_of_input = false;
+  const uint8_t* buf;
+  size_t buf_size;
+  raw_input->GetNext(&buf, &buf_size, &end_of_input);
+  if (!end_of_input) {
+    return Error("Unexpected multiple json data inputs");
+  }
+  if (buf == nullptr) {
+    return Error("Unexpected null json data");
+  }
+
+  std::string json_str(reinterpret_cast<const char*>(buf), buf_size);
+  std::cout << "FIXME TODO: JSON data string is " << json_str << std::endl;
+
+
+  if (d.Parse(json_str.c_str()).HasParseError()) {
+    return Error("Unable to parse json string: " + json_str);
+  }
+
+  // FIXME TKG -- where/how would the 'streaming' option get plugged in?
+
+  // FIXME TKG -- GOOD GOD! Is it this hard to add a single value into a json
+  // object??
+  // FIXME TKG -- what if the user supplied this in the input json file?
+  d.AddMember(
+      "model",
+      rapidjson::Value().SetString(
+          options.model_name_.c_str(),
+          static_cast<rapidjson::SizeType>(options.model_name_.length()),
+          d.GetAllocator()),
+      d.GetAllocator());
+
+  for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) {
+    std::cout << "FIXME TODO: valid JSON object has key "
+              << itr->name.GetString() << std::endl;
+  }
+
+  return Error::Success;
+}
 
 HttpClient::HttpClient(const std::string& url, bool verbose)
     : InferenceServerClient(verbose), url_(url)

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h b/src/c++/perf_analyzer/client_backend/openai/openai_http_client.h
@@ -27,6 +27,7 @@
 
 #include "../client_backend.h"
 #include "common.h"
+#include "openai_infer_input.h"
 
 
 namespace tc = triton::client;
@@ -37,6 +38,8 @@ namespace openai {
 class InferResult;
 class HttpInferRequest;
 
+using OpenAiOnCompleteFn = std::function<void(InferResult*)>;
+
 //==============================================================================
 /// An HttpClient object is used to perform any kind of communication with the
 /// OpenAi service using <TODO: FILL IN>
@@ -63,6 +66,15 @@ class HttpClient : public tc::InferenceServerClient {
       std::unique_ptr<HttpClient>* client, const std::string& server_url,
       const bool verbose);
 
+  /// TODO FIXME: Update
+  /// Run asynchronous inference on server.
+  Error AsyncInfer(
+      OpenAiOnCompleteFn callback, const InferOptions& options,
+      const std::vector<InferInput*>& inputs,
+      const std::vector<const InferRequestedOutput*>& outputs =
+          std::vector<const InferRequestedOutput*>(),
+      const Headers& headers = Headers());
+
  private:
   HttpClient(const std::string& url, bool verbose);
 

diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc
@@ -0,0 +1,112 @@
+// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#include "openai_infer_input.h"
+
+namespace triton { namespace perfanalyzer { namespace clientbackend {
+namespace openai {
+
+Error
+OpenAiInferInput::Create(
+    InferInput** infer_input, const std::string& name,
+    const std::vector<int64_t>& dims, const std::string& datatype)
+{
+  OpenAiInferInput* local_infer_input =
+      new OpenAiInferInput(name, dims, datatype);
+
+  *infer_input = local_infer_input;
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::SetShape(const std::vector<int64_t>& shape)
+{
+  shape_ = shape;
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::Reset()
+{
+  bufs_.clear();
+  buf_byte_sizes_.clear();
+  bufs_idx_ = 0;
+  byte_size_ = 0;
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
+{
+  byte_size_ += input_byte_size;
+
+  bufs_.push_back(input);
+  buf_byte_sizes_.push_back(input_byte_size);
+
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::ByteSize(size_t* byte_size) const
+{
+  *byte_size = byte_size_;
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::PrepareForRequest()
+{
+  // Reset position so request sends entire input.
+  bufs_idx_ = 0;
+  buf_pos_ = 0;
+  return Error::Success;
+}
+
+Error
+OpenAiInferInput::GetNext(
+    const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
+{
+  if (bufs_idx_ < bufs_.size()) {
+    *buf = bufs_[bufs_idx_];
+    *input_bytes = buf_byte_sizes_[bufs_idx_];
+    bufs_idx_++;
+  } else {
+    *buf = nullptr;
+    *input_bytes = 0;
+  }
+  *end_of_input = (bufs_idx_ >= bufs_.size());
+
+  return Error::Success;
+}
+
+OpenAiInferInput::OpenAiInferInput(
+    const std::string& name, const std::vector<int64_t>& dims,
+    const std::string& datatype)
+    : InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims)
+{
+}
+
+}}}}  // namespace triton::perfanalyzer::clientbackend::openai
diff --git a/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h b/src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
@@ -0,0 +1,76 @@
+// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+//
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions
+// are met:
+//  * Redistributions of source code must retain the above copyright
+//    notice, this list of conditions and the following disclaimer.
+//  * Redistributions in binary form must reproduce the above copyright
+//    notice, this list of conditions and the following disclaimer in the
+//    documentation and/or other materials provided with the distribution.
+//  * Neither the name of NVIDIA CORPORATION nor the names of its
+//    contributors may be used to endorse or promote products derived
+//    from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+// PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+#pragma once
+
+#include <string>
+
+#include "../../perf_utils.h"
+#include "../client_backend.h"
+
+
+namespace triton { namespace perfanalyzer { namespace clientbackend {
+namespace openai {
+
+//==============================================================
+/// OpenAiInferInput instance holds the information regarding
+/// model input tensors and their corresponding generated data.
+///
+class OpenAiInferInput : public InferInput {
+ public:
+  static Error Create(
+      InferInput** infer_input, const std::string& name,
+      const std::vector<int64_t>& dims, const std::string& datatype);
+  /// See InferInput::Shape()
+  const std::vector<int64_t>& Shape() const override { return shape_; }
+  /// See InferInput::SetShape()
+  Error SetShape(const std::vector<int64_t>& shape) override;
+  /// See InferInput::Reset()
+  Error Reset() override;
+  /// See InferInput::AppendRaw()
+  Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
+  /// Gets the size of data added into this input in bytes.
+  /// \param byte_size The size of data added in bytes.
+  /// \return Error object indicating success or failure.
+  Error ByteSize(size_t* byte_size) const;
+  /// Resets the heads to start providing data from the beginning.
+  Error PrepareForRequest();
+  /// Get the next chunk of data if available.
+  Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input);
+
+ private:
+  explicit OpenAiInferInput(
+      const std::string& name, const std::vector<int64_t>& dims,
+      const std::string& datatype);
+
+  std::vector<int64_t> shape_;
+  size_t byte_size_{0};
+
+  size_t bufs_idx_, buf_pos_;
+  std::vector<const uint8_t*> bufs_;
+  std::vector<size_t> buf_byte_sizes_;
+};
+
+}}}}  // namespace triton::perfanalyzer::clientbackend::openai