Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

New Records for LLM metrics #839

Merged
merged 2 commits into from
Mar 19, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
60 changes: 60 additions & 0 deletions model_analyzer/record/types/inter_token_latency_avg.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase


@total_ordering
class InterTokenLatencyAvg(InterTokenLatencyBase):
"""
A record for perf_analyzer Inter token latency metric
"""

tag = "inter_token_latency_avg"
nv-braf marked this conversation as resolved.
Show resolved Hide resolved

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.

Returns
-------
str
The full name of the
metric.
"""

return "Avg Inter Token Latency (ms)"
74 changes: 74 additions & 0 deletions model_analyzer/record/types/inter_token_latency_base.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,74 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.record import DecreasingRecord


@total_ordering
class InterTokenLatencyBase(DecreasingRecord):
"""
A record for perf_analyzer Inter token latency metric
"""

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

def __eq__(self, other):
"""
Allows checking for
equality between two records
"""

return self.value() == other.value()

def __lt__(self, other):
"""
Allows checking if
this record is less than
the other
"""

return self.value() > other.value()

def __add__(self, other):
"""
Allows adding two records together
to produce a brand new record.
"""

return self.__class__(value=(self.value() + other.value()))

def __sub__(self, other):
"""
Allows subbing two records together
to produce a brand new record.

** Note this does reverse subtraction because
of the inverted nature of latency (lower is better)
"""

return self.__class__(value=(other.value() - self.value()))
60 changes: 60 additions & 0 deletions model_analyzer/record/types/inter_token_latency_max.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase


@total_ordering
class InterTokenLatencyMax(InterTokenLatencyBase):
"""
A record for perf_analyzer Inter token latency metric
"""

tag = "inter_token_latency_max"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.

Returns
-------
str
The full name of the
metric.
"""

return "Max Inter Token Latency (ms)"
60 changes: 60 additions & 0 deletions model_analyzer/record/types/inter_token_latency_min.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase


@total_ordering
class InterTokenLatencyMin(InterTokenLatencyBase):
"""
A record for perf_analyzer Inter token latency metric
"""

tag = "inter_token_latency_min"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.

Returns
-------
str
The full name of the
metric.
"""

return "Min Inter Token Latency (ms)"
60 changes: 60 additions & 0 deletions model_analyzer/record/types/inter_token_latency_p75.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase


@total_ordering
class InterTokenLatencyP75(InterTokenLatencyBase):
"""
A record for perf_analyzer Inter token latency metric
"""

tag = "inter_token_latency_p75"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.

Returns
-------
str
The full name of the
metric.
"""

return "p75 Inter Token Latency (ms)"
60 changes: 60 additions & 0 deletions model_analyzer/record/types/inter_token_latency_p90.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,60 @@
#!/usr/bin/env python3

# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Licensed under the Apache License, Version 2.0 (the "License");
# you may not use this file except in compliance with the License.
# You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing, software
# distributed under the License is distributed on an "AS IS" BASIS,
# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
# See the License for the specific language governing permissions and
# limitations under the License.

from functools import total_ordering

from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase


@total_ordering
class InterTokenLatencyP99(InterTokenLatencyBase):
"""
A record for perf_analyzer Inter token latency metric
"""

tag = "inter_token_latency_p90"

def __init__(self, value, timestamp=0):
"""
Parameters
----------
value : float
the latency extracted from the perf analyzer output
timestamp : float
Elapsed time from start of program
"""

super().__init__(value, timestamp)

@classmethod
def header(cls, aggregation_tag=False):
"""
Parameters
----------
aggregation_tag: bool
An optional tag that may be displayed
as part of the header indicating that
this record has been aggregated using
max, min or average etc.

Returns
-------
str
The full name of the
metric.
"""

return "p90 Inter Token Latency (ms)"
Loading
Loading