Skip to content

Commit

Permalink
Add GenerateReport testcase
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Feb 9, 2024
1 parent 3579696 commit d50edd0
Show file tree
Hide file tree
Showing 3 changed files with 161 additions and 30 deletions.
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -204,6 +204,7 @@ add_executable(
mock_sequence_manager.h
mock_profile_data_collector.h
mock_profile_data_exporter.h
mock_report_writer.h
test_dataloader.cc
test_inference_profiler.cc
test_command_line_parser.cc
Expand Down
4 changes: 2 additions & 2 deletions src/c++/perf_analyzer/report_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -36,7 +36,7 @@
namespace triton { namespace perfanalyzer {

#ifndef DOCTEST_CONFIG_DISABLE
class TestReportWriter;
class NaggyMockReportWriter;
#endif

//==============================================================================
Expand Down Expand Up @@ -116,7 +116,7 @@ class ReportWriter {
const bool should_output_llm_metrics_{false};

#ifndef DOCTEST_CONFIG_DISABLE
friend TestReportWriter;
friend NaggyMockReportWriter;

public:
ReportWriter() = default;
Expand Down
186 changes: 158 additions & 28 deletions src/c++/perf_analyzer/test_report_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -27,35 +27,17 @@
#include <string>

#include "doctest.h"
#include "mock_model_parser.h"
#include "mock_report_writer.h"
#include "profile_data_collector.h"
#include "report_writer.h"
#include "request_record.h"

namespace triton { namespace perfanalyzer {

class TestReportWriter : ReportWriter {
public:
TestReportWriter() = default;
TestReportWriter(const std::shared_ptr<ProfileDataCollector>& collector)
: ReportWriter(
"", false, std::vector<pa::PerfStatus>{}, false, false, 0, nullptr,
false, collector, true)
{
}
void WriteGPUMetrics(std::ostream& ofs, const Metrics& metrics)
{
ReportWriter::WriteGPUMetrics(ofs, metrics);
}

void WriteLLMMetrics(std::ostream& ofs, PerfStatus& status)
{
ReportWriter::WriteLLMMetrics(ofs, status);
}
};

TEST_CASE("testing WriteGPUMetrics")
TEST_CASE("report_writer: WriteGPUMetrics")
{
TestReportWriter trw{};
MockReportWriter mrw{};
Metrics m{};
m.gpu_utilization_per_gpu["a"] = 1.0;
m.gpu_power_usage_per_gpu["a"] = 2.2;
Expand All @@ -65,15 +47,15 @@ TEST_CASE("testing WriteGPUMetrics")

SUBCASE("single gpu complete output")
{
trw.WriteGPUMetrics(actual_output, m);
mrw.WriteGPUMetrics(actual_output, m);
const std::string expected_output{",a:1;,a:2.2;,a:3;,a:4;"};
CHECK(actual_output.str() == expected_output);
}

SUBCASE("single gpu missing data")
{
m.gpu_power_usage_per_gpu.erase("a");
trw.WriteGPUMetrics(actual_output, m);
mrw.WriteGPUMetrics(actual_output, m);
const std::string expected_output{",a:1;,,a:3;,a:4;"};
CHECK(actual_output.str() == expected_output);
}
Expand All @@ -87,7 +69,7 @@ TEST_CASE("testing WriteGPUMetrics")

SUBCASE("multi gpu complete output")
{
trw.WriteGPUMetrics(actual_output, m);
mrw.WriteGPUMetrics(actual_output, m);
const std::string expected_output{
",a:1;z:100;,a:2.2;z:222.2;,a:3;z:45;,a:4;z:89;"};
CHECK(actual_output.str() == expected_output);
Expand All @@ -97,7 +79,7 @@ TEST_CASE("testing WriteGPUMetrics")
{
m.gpu_utilization_per_gpu.erase("z");
m.gpu_power_usage_per_gpu.erase("a");
trw.WriteGPUMetrics(actual_output, m);
mrw.WriteGPUMetrics(actual_output, m);
const std::string expected_output{",a:1;,z:222.2;,a:3;z:45;,a:4;z:89;"};
CHECK(actual_output.str() == expected_output);
}
Expand Down Expand Up @@ -250,9 +232,157 @@ TEST_CASE("report_writer: WriteLLMMetrics")
status.concurrency = infer_mode.concurrency;
status.request_rate = infer_mode.request_rate;

TestReportWriter trw(collector);
trw.WriteLLMMetrics(actual_output, status);
MockReportWriter mrw;
mrw.collector_ = collector;
mrw.WriteLLMMetrics(actual_output, status);
CHECK(actual_output.str() == expected_output);
}

TEST_CASE("report_writer: GenerateReport")
{
std::string filename{"temp.csv"};
std::vector<PerfStatus> summary;
std::shared_ptr<ModelParser> mmp{nullptr};
std::shared_ptr<ProfileDataCollector> collector{nullptr};
CHECK_NOTHROW_MESSAGE(
pa::ProfileDataCollector::Create(&collector),
"failed to create profile data collector");

// default parameters
bool target_concurrency{true};
bool verbose_csv{false};
bool include_server_stats{false};
int32_t percentile{90};
bool should_output_metrics{false};
bool should_output_llm_metrics{false};
bool is_sequence_model{false};
bool is_decoupled_model{false};

std::ostringstream actual_output;
std::string expected_output;

SUBCASE("single experiment")
{
mmp = std::make_shared<MockModelParser>(
is_sequence_model, is_decoupled_model);

ClientSideStats css;
css.infer_per_sec = 150.123;
css.avg_send_time_ns = 2000;
css.avg_receive_time_ns = 3000;

PerfStatus ps;
ps.concurrency = 10;
ps.client_stats = css;

summary.push_back(ps);

expected_output =
"Concurrency,Inferences/Second,Client Send,Client "
"Recv\n"
"10,150.123,2,3\n";
}

SUBCASE("multiple LLM experiments")
{
// set parameters
is_decoupled_model = true;
should_output_llm_metrics = true;

mmp = std::make_shared<MockModelParser>(
is_sequence_model, is_decoupled_model);

// first experiment
ClientSideStats css1;
css1.infer_per_sec = 150;
css1.responses_per_sec = 123.456;
css1.avg_send_time_ns = 2000;
css1.avg_receive_time_ns = 3000;

PerfStatus ps1;
ps1.concurrency = 10;
ps1.client_stats = css1;

summary.push_back(ps1);

InferenceLoadMode infer_mode1{ps1.concurrency, ps1.request_rate};
uint64_t sequence_id1{123};
uint64_t request_timestamp1{1};
std::vector<uint64_t> response_timestamps1{4, 5, 8, 10};
RequestRecord rr1 = GenerateRequestRecord(
sequence_id1, request_timestamp1, response_timestamps1);

uint64_t sequence_id2{456};
uint64_t request_timestamp2{2};
std::vector<uint64_t> response_timestamps2{6, 7, 10, 12};
RequestRecord rr2 = GenerateRequestRecord(
sequence_id2, request_timestamp2, response_timestamps2);

std::vector<RequestRecord> request_records1{rr1, rr2};
collector->AddData(infer_mode1, std::move(request_records1));

// second experiment
ClientSideStats css2;
css2.infer_per_sec = 345.12;
css2.responses_per_sec = 10.789;
css2.avg_send_time_ns = 4000;
css2.avg_receive_time_ns = 5000;

PerfStatus ps2;
ps2.concurrency = 30;
ps2.client_stats = css2;

summary.push_back(ps2);

InferenceLoadMode infer_mode2{ps2.concurrency, ps2.request_rate};
uint64_t sequence_id3{123};
uint64_t request_timestamp3{1};
std::vector<uint64_t> response_timestamps3{5, 8, 9, 11};
RequestRecord rr3 = GenerateRequestRecord(
sequence_id3, request_timestamp3, response_timestamps3);

uint64_t sequence_id4{456};
uint64_t request_timestamp4{2};
std::vector<uint64_t> response_timestamps4{10, 15, 19, 22};
RequestRecord rr4 = GenerateRequestRecord(
sequence_id4, request_timestamp4, response_timestamps4);

std::vector<RequestRecord> request_records2{rr3, rr4};
collector->AddData(infer_mode2, std::move(request_records2));

expected_output =
"Concurrency,Inferences/Second,Response Throughput,Client Send,Client "
"Recv,Avg First Token Latency,Avg Token-to-Token Latency\n"
"10,150,123.456,2,3,3.5,2\n"
"30,345.12,10.789,4,5,6,3\n";
}

MockReportWriter mrw{
filename,
summary,
mmp,
collector,
target_concurrency,
verbose_csv,
include_server_stats,
percentile,
should_output_metrics,
should_output_llm_metrics,
};
mrw.GenerateReport();

// read from temp.csv
std::ifstream input_file(filename);
std::string line;
while (std::getline(input_file, line)) {
actual_output << line << "\n";
}
input_file.close();

CHECK(actual_output.str() == expected_output);

// clean up
std::remove(filename.c_str());
}

}} // namespace triton::perfanalyzer

0 comments on commit d50edd0

Please sign in to comment.