Skip to content

Commit

Permalink
Fix divide by zero and unit conversion for throughput display (#500)
Browse files Browse the repository at this point in the history
* Fix divide by zero

* Convert ns to secs for throughput metrics
  • Loading branch information
nv-hwoo authored and mc-nv committed Mar 13, 2024
1 parent e96b7a6 commit 9c4775d
Showing 1 changed file with 9 additions and 3 deletions.
12 changes: 9 additions & 3 deletions src/c++/perf_analyzer/genai-pa/genai_pa/llm_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -337,7 +337,8 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:

# request latencies
req_latency = res_timestamps[-1] - req_timestamp
request_latencies.append(req_latency)
request_latencies.append(req_latency) # nanosec
req_latency = req_latency / 1e9 # sec

# time to first token
time_to_first_tokens.append(res_timestamps[0] - req_timestamp)
Expand All @@ -351,10 +352,15 @@ def _parse_requests(self, requests: dict) -> LLMMetrics:

# inter token latency
for (t1, _), (t2, n2) in pairwise(zip(res_timestamps, num_output_tokens)):
inter_token_latencies.append(round((t2 - t1) / n2))
# TMA-1676: handle empty first/last responses
# if the latter response has zero token (e.g. empty string),
# then set it default to one for the sake of inter token latency
# calculation and to avoid divide by zero.
num_token = 1 if n2 == 0 else n2
inter_token_latencies.append(round((t2 - t1) / num_token))

# request throughput
benchmark_duration = max_res_timestamp - min_req_timestamp
benchmark_duration = (max_res_timestamp - min_req_timestamp) / 1e9 # nanosec
request_throughputs = [len(requests) / benchmark_duration]

return LLMMetrics(
Expand Down

0 comments on commit 9c4775d

Please sign in to comment.