-
Notifications
You must be signed in to change notification settings - Fork 234
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
New JSON datatype for PA. Show json data available at http_client level
- Loading branch information
Showing
8 changed files
with
292 additions
and
12 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
112 changes: 112 additions & 0 deletions
112
src/c++/perf_analyzer/client_backend/openai/openai_infer_input.cc
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,112 @@ | ||
// Copyright (c) 2024, NVIDIA CORPORATION. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions | ||
// are met: | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above copyright | ||
// notice, this list of conditions and the following disclaimer in the | ||
// documentation and/or other materials provided with the distribution. | ||
// * Neither the name of NVIDIA CORPORATION nor the names of its | ||
// contributors may be used to endorse or promote products derived | ||
// from this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
|
||
#include "openai_infer_input.h" | ||
|
||
namespace triton { namespace perfanalyzer { namespace clientbackend { | ||
namespace openai { | ||
|
||
Error | ||
OpenAiInferInput::Create( | ||
InferInput** infer_input, const std::string& name, | ||
const std::vector<int64_t>& dims, const std::string& datatype) | ||
{ | ||
OpenAiInferInput* local_infer_input = | ||
new OpenAiInferInput(name, dims, datatype); | ||
|
||
*infer_input = local_infer_input; | ||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::SetShape(const std::vector<int64_t>& shape) | ||
{ | ||
shape_ = shape; | ||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::Reset() | ||
{ | ||
bufs_.clear(); | ||
buf_byte_sizes_.clear(); | ||
bufs_idx_ = 0; | ||
byte_size_ = 0; | ||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::AppendRaw(const uint8_t* input, size_t input_byte_size) | ||
{ | ||
byte_size_ += input_byte_size; | ||
|
||
bufs_.push_back(input); | ||
buf_byte_sizes_.push_back(input_byte_size); | ||
|
||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::ByteSize(size_t* byte_size) const | ||
{ | ||
*byte_size = byte_size_; | ||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::PrepareForRequest() | ||
{ | ||
// Reset position so request sends entire input. | ||
bufs_idx_ = 0; | ||
buf_pos_ = 0; | ||
return Error::Success; | ||
} | ||
|
||
Error | ||
OpenAiInferInput::GetNext( | ||
const uint8_t** buf, size_t* input_bytes, bool* end_of_input) | ||
{ | ||
if (bufs_idx_ < bufs_.size()) { | ||
*buf = bufs_[bufs_idx_]; | ||
*input_bytes = buf_byte_sizes_[bufs_idx_]; | ||
bufs_idx_++; | ||
} else { | ||
*buf = nullptr; | ||
*input_bytes = 0; | ||
} | ||
*end_of_input = (bufs_idx_ >= bufs_.size()); | ||
|
||
return Error::Success; | ||
} | ||
|
||
OpenAiInferInput::OpenAiInferInput( | ||
const std::string& name, const std::vector<int64_t>& dims, | ||
const std::string& datatype) | ||
: InferInput(BackendKind::TENSORFLOW_SERVING, name, datatype), shape_(dims) | ||
{ | ||
} | ||
|
||
}}}} // namespace triton::perfanalyzer::clientbackend::openai |
76 changes: 76 additions & 0 deletions
76
src/c++/perf_analyzer/client_backend/openai/openai_infer_input.h
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,76 @@ | ||
// Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. | ||
// | ||
// Redistribution and use in source and binary forms, with or without | ||
// modification, are permitted provided that the following conditions | ||
// are met: | ||
// * Redistributions of source code must retain the above copyright | ||
// notice, this list of conditions and the following disclaimer. | ||
// * Redistributions in binary form must reproduce the above copyright | ||
// notice, this list of conditions and the following disclaimer in the | ||
// documentation and/or other materials provided with the distribution. | ||
// * Neither the name of NVIDIA CORPORATION nor the names of its | ||
// contributors may be used to endorse or promote products derived | ||
// from this software without specific prior written permission. | ||
// | ||
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY | ||
// EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE | ||
// IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR | ||
// PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR | ||
// CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, | ||
// EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, | ||
// PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR | ||
// PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY | ||
// OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT | ||
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE | ||
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. | ||
#pragma once | ||
|
||
#include <string> | ||
|
||
#include "../../perf_utils.h" | ||
#include "../client_backend.h" | ||
|
||
|
||
namespace triton { namespace perfanalyzer { namespace clientbackend { | ||
namespace openai { | ||
|
||
//============================================================== | ||
/// OpenAiInferInput instance holds the information regarding | ||
/// model input tensors and their corresponding generated data. | ||
/// | ||
class OpenAiInferInput : public InferInput { | ||
public: | ||
static Error Create( | ||
InferInput** infer_input, const std::string& name, | ||
const std::vector<int64_t>& dims, const std::string& datatype); | ||
/// See InferInput::Shape() | ||
const std::vector<int64_t>& Shape() const override { return shape_; } | ||
/// See InferInput::SetShape() | ||
Error SetShape(const std::vector<int64_t>& shape) override; | ||
/// See InferInput::Reset() | ||
Error Reset() override; | ||
/// See InferInput::AppendRaw() | ||
Error AppendRaw(const uint8_t* input, size_t input_byte_size) override; | ||
/// Gets the size of data added into this input in bytes. | ||
/// \param byte_size The size of data added in bytes. | ||
/// \return Error object indicating success or failure. | ||
Error ByteSize(size_t* byte_size) const; | ||
/// Resets the heads to start providing data from the beginning. | ||
Error PrepareForRequest(); | ||
/// Get the next chunk of data if available. | ||
Error GetNext(const uint8_t** buf, size_t* input_bytes, bool* end_of_input); | ||
|
||
private: | ||
explicit OpenAiInferInput( | ||
const std::string& name, const std::vector<int64_t>& dims, | ||
const std::string& datatype); | ||
|
||
std::vector<int64_t> shape_; | ||
size_t byte_size_{0}; | ||
|
||
size_t bufs_idx_, buf_pos_; | ||
std::vector<const uint8_t*> bufs_; | ||
std::vector<size_t> buf_byte_sizes_; | ||
}; | ||
|
||
}}}} // namespace triton::perfanalyzer::clientbackend::openai |
Oops, something went wrong.