Do not reset token-to-token latencies list

triton-inference-server · Nov 8, 2023 · 9afc51e · 9afc51e
1 parent 7afa27a
commit 9afc51e
Showing 1 changed file with 0 additions and 1 deletion.
diff --git a/src/c++/perf_analyzer/docs/examples/profile.py b/src/c++/perf_analyzer/docs/examples/profile.py
@@ -270,7 +270,6 @@ def collect_online_metrics(export_data, output_tokens):
         first_token_latencies.append(first_token_latency)
         generation_latencies.append(generation_latency_ms)
         generation_throughputs.append(output_tokens / generation_latency_s)
-        token_to_token_latencies = []
         for prev_res, res in pairwise(responses):
             token_to_token_latencies.append((res - prev_res) / 1_000_000)
     return (