Skip to content

Commit

Permalink
New JSON datatype for PA. Show json data available at http_client level
Browse files Browse the repository at this point in the history
  • Loading branch information
tgerdesnv committed Feb 28, 2024
1 parent 6257def commit fb045d5
Show file tree
Hide file tree
Showing 8 changed files with 291 additions and 12 deletions.
5 changes: 1 addition & 4 deletions src/c++/perf_analyzer/client_backend/client_backend.cc
Original file line number Diff line number Diff line change
Expand Up @@ -435,10 +435,7 @@ InferInput::Create(
#ifdef TRITON_ENABLE_PERF_ANALYZER_OPENAI
else if (kind == OPENAI) {
RETURN_IF_CB_ERROR(
// FIXME TODO TKG
// openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
tritonremote::TritonInferInput::Create(
infer_input, name, dims, datatype));
openai::OpenAiInferInput::Create(infer_input, name, dims, datatype));
}
#endif // TRITON_ENABLE_PERF_ANALYZER_OPENAI
#ifdef TRITON_ENABLE_PERF_ANALYZER_TFS
Expand Down
2 changes: 2 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -30,12 +30,14 @@ set(
OPENAI_CLIENT_BACKEND_SRCS
openai_client_backend.cc
openai_http_client.cc
openai_infer_input.cc
)

set(
OPENAI_CLIENT_BACKEND_HDRS
openai_client_backend.h
openai_http_client.h
openai_infer_input.h
)

add_library(
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -42,11 +42,10 @@ OpenAiClientBackend::Create(
"perf_analyzer does not support gRPC protocol with OpenAI endpoints");
}
std::unique_ptr<OpenAiClientBackend> openai_client_backend(
new OpenAiClientBackend(http_headers));
new OpenAiClientBackend(http_headers));

// TODO: Adjust as needed
RETURN_IF_CB_ERROR(HttpClient::Create(
&(openai_client_backend->http_client_), url, verbose));
RETURN_IF_CB_ERROR(
HttpClient::Create(&(openai_client_backend->http_client_), url, verbose));

*client_backend = std::move(openai_client_backend);

Expand All @@ -64,8 +63,8 @@ OpenAiClientBackend::AsyncInfer(
callback(result);
};

// TODO: make an async infer call
//RETURN_IF_CB_ERROR(http_client_->AsyncInfer(...));
RETURN_IF_CB_ERROR(http_client_->AsyncInfer(
wrapped_callback, options, inputs, outputs, *http_headers_));

return Error::Success;
}
Expand Down Expand Up @@ -112,8 +111,7 @@ OpenAiInferRequestedOutput::Create(
return Error::Success;
}

OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(
const std::string& name)
OpenAiInferRequestedOutput::OpenAiInferRequestedOutput(const std::string& name)
: InferRequestedOutput(BackendKind::OPENAI, name)
{
}
Expand Down
59 changes: 59 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/openai_http_client.cc
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,8 @@

#include "openai_http_client.h"

#include <rapidjson/rapidjson.h>


namespace triton { namespace perfanalyzer { namespace clientbackend {
namespace openai {
Expand All @@ -40,6 +42,63 @@ HttpClient::Create(
return Error::Success;
}

Error
HttpClient::AsyncInfer(
OpenAiOnCompleteFn callback, const InferOptions& options,
const std::vector<InferInput*>& inputs,
const std::vector<const InferRequestedOutput*>& outputs,
const Headers& headers)
{
// TODO FIXME implement

// TODO FIXME cleanup or remove this. It just proves the json data arrives
rapidjson::Document d{};

if (inputs.size() != 1) {
return Error("Only expecting one input");
}

auto raw_input = dynamic_cast<OpenAiInferInput*>(inputs[0]);

raw_input->PrepareForRequest();
bool end_of_input = false;
const uint8_t* buf;
size_t buf_size;
raw_input->GetNext(&buf, &buf_size, &end_of_input);
if (!end_of_input) {
return Error("Unexpected multiple json data inputs");
}
if (buf == nullptr) {
return Error("Unexpected null json data");
}

std::string json_str(reinterpret_cast<const char*>(buf), buf_size);
std::cout << "FIXME TODO: JSON data string is " << json_str << std::endl;


if (d.Parse(json_str.c_str()).HasParseError()) {
return Error("Unable to parse json string: " + json_str);
}

// FIXME TKG -- where/how would the 'streaming' option get plugged in?

// FIXME TKG -- GOOD GOD! Is it this hard to add a single value into a json
// object??
d.AddMember(
"model",
rapidjson::Value().SetString(
options.model_name_.c_str(),
static_cast<rapidjson::SizeType>(options.model_name_.length()),
d.GetAllocator()),
d.GetAllocator());

for (auto itr = d.MemberBegin(); itr != d.MemberEnd(); ++itr) {
std::cout << "FIXME TODO: valid JSON object has key "
<< itr->name.GetString() << std::endl;
}

return Error::Success;
}

HttpClient::HttpClient(const std::string& url, bool verbose)
: InferenceServerClient(verbose), url_(url)
Expand Down
12 changes: 12 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/openai_http_client.h
Original file line number Diff line number Diff line change
Expand Up @@ -27,6 +27,7 @@

#include "../client_backend.h"
#include "common.h"
#include "openai_infer_input.h"


namespace tc = triton::client;
Expand All @@ -37,6 +38,8 @@ namespace openai {
class InferResult;
class HttpInferRequest;

using OpenAiOnCompleteFn = std::function<void(InferResult*)>;

//==============================================================================
/// An HttpClient object is used to perform any kind of communication with the
/// OpenAi service using <TODO: FILL IN>
Expand All @@ -63,6 +66,15 @@ class HttpClient : public tc::InferenceServerClient {
std::unique_ptr<HttpClient>* client, const std::string& server_url,
const bool verbose);

/// TODO FIXME: Update
/// Run asynchronous inference on server.
Error AsyncInfer(
OpenAiOnCompleteFn callback, const InferOptions& options,
const std::vector<InferInput*>& inputs,
const std::vector<const InferRequestedOutput*>& outputs =
std::vector<const InferRequestedOutput*>(),
const Headers& headers = Headers());

private:
HttpClient(const std::string& url, bool verbose);

Expand Down
112 changes: 112 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc
Original file line number Diff line number Diff line change
@@ -0,0 +1,112 @@
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

#include "openai_infer_input.h"

namespace triton { namespace perfanalyzer { namespace clientbackend {
namespace openai {

Error
OpenAiInferInput::Create(
InferInput** infer_input, const std::string& name,
const std::vector<int64_t>& dims, const std::string& datatype)
{
OpenAiInferInput* local_infer_input =
new OpenAiInferInput(name, dims, datatype);

*infer_input = local_infer_input;
return Error::Success;
}

Error
OpenAiInferInput::SetShape(const std::vector<int64_t>& shape)
{
shape_ = shape;
return Error::Success;
}

Error
OpenAiInferInput::Reset()
{
bufs_.clear();
buf_byte_sizes_.clear();
bufs_idx_ = 0;
byte_size_ = 0;
return Error::Success;
}

Error
OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size)
{
byte_size_ += input_byte_size;

bufs_.push_back(input);
buf_byte_sizes_.push_back(input_byte_size);

return Error::Success;
}

Error
OpenAiInferInput::ByteSize(size_t* byte_size) const
{
*byte_size = byte_size_;
return Error::Success;
}

Error
OpenAiInferInput::PrepareForRequest()
{
// Reset position so request sends entire input.
bufs_idx_ = 0;
buf_pos_ = 0;
return Error::Success;
}

Error
OpenAiInferInput::GetNext(
const uint8_t** buf, size_t* input_bytes, bool* end_of_input)
{
if (bufs_idx_ < bufs_.size()) {
*buf = bufs_[bufs_idx_];
*input_bytes = buf_byte_sizes_[bufs_idx_];
bufs_idx_++;
} else {
*buf = nullptr;
*input_bytes = 0;
}
*end_of_input = (bufs_idx_ >= bufs_.size());

return Error::Success;
}

OpenAiInferInput::OpenAiInferInput(
const std::string& name, const std::vector<int64_t>& dims,
const std::string& datatype)
: InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims)
{
}

}}}} // namespace triton::perfanalyzer::clientbackend::openai
76 changes: 76 additions & 0 deletions src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,76 @@
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
//
// Redistribution and use in source and binary forms, with or without
// modification, are permitted provided that the following conditions
// are met:
// * Redistributions of source code must retain the above copyright
// notice, this list of conditions and the following disclaimer.
// * Redistributions in binary form must reproduce the above copyright
// notice, this list of conditions and the following disclaimer in the
// documentation and/or other materials provided with the distribution.
// * Neither the name of NVIDIA CORPORATION nor the names of its
// contributors may be used to endorse or promote products derived
// from this software without specific prior written permission.
//
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#pragma once

#include <string>

#include "../../perf_utils.h"
#include "../client_backend.h"


namespace triton { namespace perfanalyzer { namespace clientbackend {
namespace openai {

//==============================================================
/// OpenAiInferInput instance holds the information regarding
/// model input tensors and their corresponding generated data.
///
class OpenAiInferInput : public InferInput {
public:
static Error Create(
InferInput** infer_input, const std::string& name,
const std::vector<int64_t>& dims, const std::string& datatype);
/// See InferInput::Shape()
const std::vector<int64_t>& Shape() const override { return shape_; }
/// See InferInput::SetShape()
Error SetShape(const std::vector<int64_t>& shape) override;
/// See InferInput::Reset()
Error Reset() override;
/// See InferInput::AppendRaw()
Error AppendRaw(const uint8_t* input, size_t input_byte_size) override;
/// Gets the size of data added into this input in bytes.
/// \param byte_size The size of data added in bytes.
/// \return Error object indicating success or failure.
Error ByteSize(size_t* byte_size) const;
/// Resets the heads to start providing data from the beginning.
Error PrepareForRequest();
/// Get the next chunk of data if available.
Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input);

private:
explicit OpenAiInferInput(
const std::string& name, const std::vector<int64_t>& dims,
const std::string& datatype);

std::vector<int64_t> shape_;
size_t byte_size_{0};

size_t bufs_idx_, buf_pos_;
std::vector<const uint8_t*> bufs_;
std::vector<size_t> buf_byte_sizes_;
};

}}}} // namespace triton::perfanalyzer::clientbackend::openai
Loading

0 comments on commit fb045d5

Please sign in to comment.