Skip to content

Commit

Permalink
New measurement fields created.
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Oct 9, 2023
1 parent c9d467f commit cb0d632
Show file tree
Hide file tree
Showing 4 changed files with 201 additions and 0 deletions.
7 changes: 7 additions & 0 deletions model_analyzer/perf_analyzer/perf_analyzer.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,8 @@
)
from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
from model_analyzer.record.record import Record
from model_analyzer.record.types.avg_first_token_latency import AvgFirstTokenLatency
from model_analyzer.record.types.avg_token_latency import AvgTokenLatency
from model_analyzer.record.types.gpu_free_memory import GPUFreeMemory
from model_analyzer.record.types.gpu_power_usage import GPUPowerUsage
from model_analyzer.record.types.gpu_used_memory import GPUUsedMemory
Expand Down Expand Up @@ -91,6 +93,11 @@ class PerfAnalyzer:
["gpu_used_memory", "Max GPU Memory Usage", GPUUsedMemory, "1000000"],
["gpu_free_memory", "Total GPU Memory", GPUFreeMemory, "1000000"]
]

llm_metric_table = [
["perf_latency_avg", "Avg first token latency", AvgFirstTokenLatency, "1000"],
["perf_latency_avg", "Avg token latency", AvgTokenLatency, "1000"]
]
# yapf: enable

@staticmethod
Expand Down
96 changes: 96 additions & 0 deletions model_analyzer/record/types/avg_first_token_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.record import DecreasingRecord


@total_ordering
class AvgFirstTokenLatency(DecreasingRecord):
"""
A record for perf_analyzer avg first token to token latency metric
"""

tag = "avg_first_token_latency"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed avg time for first token-to-token latency
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.
Returns
-------
str
The full name of the
metric.
"""

return "avg first token-to-token latency (ms)"

def __eq__(self, other):
"""
Allows checking for
equality between two records
"""

return self.value() == other.value()

def __lt__(self, other):
"""
Allows checking if
this record is less than
the other
"""

return self.value() > other.value()

def __add__(self, other):
"""
Allows adding two records together
to produce a brand new record.
"""

return self.__class__(value=(self.value() + other.value()))

def __sub__(self, other):
"""
Allows subbing two records together
to produce a brand new record.
** Note this does reverse subtraction because
of the inverted nature of latency (lower is better)
"""

return self.__class__(value=(other.value() - self.value()))
96 changes: 96 additions & 0 deletions model_analyzer/record/types/avg_token_latency.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
#!/usr/bin/env python3

# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.record import DecreasingRecord


@total_ordering
class AvgTokenLatency(DecreasingRecord):
"""
A record for perf_analyzer avg token-to-token latency metric
"""

tag = "avg_token_latency"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed avg time for token-to-token latency
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.
Returns
-------
str
The full name of the
metric.
"""

return "avg token-to-token latency (ms)"

def __eq__(self, other):
"""
Allows checking for
equality between two records
"""

return self.value() == other.value()

def __lt__(self, other):
"""
Allows checking if
this record is less than
the other
"""

return self.value() > other.value()

def __add__(self, other):
"""
Allows adding two records together
to produce a brand new record.
"""

return self.__class__(value=(self.value() + other.value()))

def __sub__(self, other):
"""
Allows subbing two records together
to produce a brand new record.
** Note this does reverse subtraction because
of the inverted nature of latency (lower is better)
"""

return self.__class__(value=(other.value() - self.value()))
2 changes: 2 additions & 0 deletions tests/test_record_types.py
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,8 @@ def setUp(self):
"perf_client_send_recv",
"perf_server_compute_input",
"gpu_power_usage",
"avg_first_token_latency",
"avg_token_latency",
]
}
self.more_is_better_types = {
Expand Down

0 comments on commit cb0d632

Please sign in to comment.