Skip to content

Commit

Permalink
Changing name to be avg_token_to_token...
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 12, 2023
1 parent 4f111a4 commit dbe845b
Show file tree
Hide file tree
Showing 3 changed files with 8 additions and 6 deletions.
8 changes: 5 additions & 3 deletions model_analyzer/perf_analyzer/perf_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,9 @@
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
from model_analyzer.record.record import Record
from model_analyzer.record.types.avg_first_token_latency import AvgFirstTokenLatency
from model_analyzer.record.types.avg_token_latency import AvgTokenLatency
from model_analyzer.record.types.avg_token_to_token_latency import (
AvgTokenToTokenLatency,
)
from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory
from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
Expand Down Expand Up @@ -95,8 +97,8 @@ class PerfAnalyzer:
]

llm_metric_table = [
["avg_first_latency", "Avg first token latency", AvgFirstTokenLatency, "1000"],
["avg_avg_latency", "Avg token latency", AvgTokenLatency, "1000"]
["avg_first_latency", None, AvgFirstTokenLatency, "1000"],
["avg_token_to_token_latency", None, AvgTokenToTokenLatency, "1000"]
]
# yapf: enable

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -20,12 +20,12 @@


@total_ordering
class AvgTokenLatency(DecreasingRecord):
class AvgTokenToTokenLatency(DecreasingRecord):
"""
A record for perf_analyzer avg token-to-token latency metric
"""

tag = "avg_token_latency"
tag = "avg_token_to_token_latency"

def __init__(self, value, timestamp=0):
"""
Expand Down
2 changes: 1 addition & 1 deletion tests/test_record_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def setUp(self):
"perf_server_compute_input",
"gpu_power_usage",
"avg_first_token_latency",
"avg_token_latency",
"avg_token_to_token_latency",
]
}
self.more_is_better_types = {
Expand Down

0 comments on commit dbe845b

Please sign in to comment.