Skip to content

Commit

Permalink
Extract llm metric calculation into new function
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-hwoo committed Jan 25, 2024
1 parent d86e830 commit c61d71a
Show file tree
Hide file tree
Showing 2 changed files with 13 additions and 2 deletions.
11 changes: 9 additions & 2 deletions src/c++/perf_analyzer/report_writer.cc
Original file line number Diff line number Diff line change
Expand Up @@ -409,6 +409,14 @@ ReportWriter::WriteGpuMetrics(std::ostream& ofs, const Metrics& metric)

void
ReportWriter::WriteLlmMetrics(std::ostream& ofs)
{
auto [avg_first_token_latency, avg_t2t_latency] = CalculateLlmMetrics();
ofs << "," << avg_first_token_latency;
ofs << "," << avg_t2t_latency;
}

std::tuple<double, double>
ReportWriter::CalculateLlmMetrics()
{
const std::vector<Experiment>& experiments{collector_->GetData()};
std::vector<double> first_token_latencies;
Expand Down Expand Up @@ -437,8 +445,7 @@ ReportWriter::WriteLlmMetrics(std::ostream& ofs)
std::reduce(t2t_latencies.begin(), t2t_latencies.end()) /
t2t_latencies.size();

ofs << "," << avg_first_token_latency;
ofs << "," << avg_t2t_latency;
return std::make_tuple(avg_first_token_latency, avg_t2t_latency);
}

}} // namespace triton::perfanalyzer
4 changes: 4 additions & 0 deletions src/c++/perf_analyzer/report_writer.h
Original file line number Diff line number Diff line change
Expand Up @@ -95,6 +95,10 @@ class ReportWriter {
const std::shared_ptr<ProfileDataCollector>& collector,
const bool should_output_llm_metrics);

/// Calculate LLM metrics (e.g., average first token latency) using the
/// profile data collected for decoupled model.
std::tuple<double, double> CalculateLlmMetrics();


const std::string& filename_{""};
const bool target_concurrency_{true};
Expand Down

0 comments on commit c61d71a

Please sign in to comment.