From 3df2487e9f405f43a045a30cbbe296ef59a0aa86 Mon Sep 17 00:00:00 2001 From: braf Date: Mon, 18 Mar 2024 17:08:46 +0000 Subject: [PATCH 1/2] Adding new LLM metrics --- .../record/types/inter_token_latency_avg.py | 96 ++++++++++++++++ .../record/types/inter_token_latency_max.py | 96 ++++++++++++++++ .../record/types/inter_token_latency_min.py | 96 ++++++++++++++++ .../record/types/inter_token_latency_p75.py | 96 ++++++++++++++++ .../record/types/inter_token_latency_p90.py | 96 ++++++++++++++++ .../record/types/inter_token_latency_p99.py | 96 ++++++++++++++++ .../record/types/output_token_throughput.py | 105 ++++++++++++++++++ .../record/types/time_to_first_token_avg.py | 96 ++++++++++++++++ .../record/types/time_to_first_token_max.py | 96 ++++++++++++++++ .../record/types/time_to_first_token_min.py | 96 ++++++++++++++++ .../record/types/time_to_first_token_p75.py | 96 ++++++++++++++++ .../record/types/time_to_first_token_p90.py | 96 ++++++++++++++++ .../record/types/time_to_first_token_p99.py | 96 ++++++++++++++++ tests/test_record_types.py | 13 +++ 14 files changed, 1270 insertions(+) create mode 100755 model_analyzer/record/types/inter_token_latency_avg.py create mode 100755 model_analyzer/record/types/inter_token_latency_max.py create mode 100755 model_analyzer/record/types/inter_token_latency_min.py create mode 100755 model_analyzer/record/types/inter_token_latency_p75.py create mode 100755 model_analyzer/record/types/inter_token_latency_p90.py create mode 100755 model_analyzer/record/types/inter_token_latency_p99.py create mode 100755 model_analyzer/record/types/output_token_throughput.py create mode 100755 model_analyzer/record/types/time_to_first_token_avg.py create mode 100755 model_analyzer/record/types/time_to_first_token_max.py create mode 100755 model_analyzer/record/types/time_to_first_token_min.py create mode 100755 model_analyzer/record/types/time_to_first_token_p75.py create mode 100755 model_analyzer/record/types/time_to_first_token_p90.py create mode 100755 model_analyzer/record/types/time_to_first_token_p99.py diff --git a/model_analyzer/record/types/inter_token_latency_avg.py b/model_analyzer/record/types/inter_token_latency_avg.py new file mode 100755 index 000000000..3810d1c7e --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_avg.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyAvg(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_avg" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Avg Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_max.py b/model_analyzer/record/types/inter_token_latency_max.py new file mode 100755 index 000000000..ffb3879fb --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_max.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyMax(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_max" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Max Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_min.py b/model_analyzer/record/types/inter_token_latency_min.py new file mode 100755 index 000000000..3f6344bea --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_min.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyMin(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_min" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Min Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p75.py b/model_analyzer/record/types/inter_token_latency_p75.py new file mode 100755 index 000000000..c2ff01664 --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_p75.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyP75(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p75" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p75 Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p90.py b/model_analyzer/record/types/inter_token_latency_p90.py new file mode 100755 index 000000000..4f53c69fa --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_p90.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyP99(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p90" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p90 Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p99.py b/model_analyzer/record/types/inter_token_latency_p99.py new file mode 100755 index 000000000..f203f78c2 --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_p99.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyP99(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p99" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p99 Inter Token Latency (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/output_token_throughput.py b/model_analyzer/record/types/output_token_throughput.py new file mode 100755 index 000000000..f7edf7cb8 --- /dev/null +++ b/model_analyzer/record/types/output_token_throughput.py @@ -0,0 +1,105 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import IncreasingRecord + + +@total_ordering +class OutputTokenThroughput(IncreasingRecord): + """ + A record for perf_analyzer + metric 'Output Token Throughput' + """ + + tag = "output_token_throughput" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + The throughput from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + """ + Returns the total value from a list + + Returns + ------- + Total value of the list + """ + return sum + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Output Token Throughput (infer/sec)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/model_analyzer/record/types/time_to_first_token_avg.py b/model_analyzer/record/types/time_to_first_token_avg.py new file mode 100755 index 000000000..d87642b1d --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_avg.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenAvg(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_avg" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Avg Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_max.py b/model_analyzer/record/types/time_to_first_token_max.py new file mode 100755 index 000000000..d53741d7e --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_max.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenMax(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_max" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Max Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_min.py b/model_analyzer/record/types/time_to_first_token_min.py new file mode 100755 index 000000000..ee556f6ba --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_min.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenMin(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_min" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Min Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p75.py b/model_analyzer/record/types/time_to_first_token_p75.py new file mode 100755 index 000000000..f996517e9 --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_p75.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenP75(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p75" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p75 Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p90.py b/model_analyzer/record/types/time_to_first_token_p90.py new file mode 100755 index 000000000..6009d06ba --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_p90.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenP90(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p90" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p90 Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p99.py b/model_analyzer/record/types/time_to_first_token_p99.py new file mode 100755 index 000000000..2302c82d7 --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_p99.py @@ -0,0 +1,96 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenP99(DecreasingRecord): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p99" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p99 Time To First Token (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/tests/test_record_types.py b/tests/test_record_types.py index 4bd6d8b32..54c353200 100755 --- a/tests/test_record_types.py +++ b/tests/test_record_types.py @@ -49,6 +49,18 @@ def setUp(self): "perf_latency_p90", "perf_latency_p95", "perf_latency_p99", + "inter_token_latency_min", + "inter_token_latency_max", + "inter_token_latency_avg", + "inter_token_latency_p75", + "inter_token_latency_p90", + "inter_token_latency_p99", + "time_to_first_token_min", + "time_to_first_token_max", + "time_to_first_token_avg", + "time_to_first_token_p75", + "time_to_first_token_p90", + "time_to_first_token_p99", "gpu_used_memory", "cpu_used_ram", "perf_server_compute_infer", @@ -65,6 +77,7 @@ def setUp(self): record_types[k] for k in [ "perf_throughput", + "output_token_throughput", "gpu_free_memory", "gpu_utilization", "cpu_available_ram", From 8a52aed690a06202347bb8b180d8c9c6f6564062 Mon Sep 17 00:00:00 2001 From: braf Date: Tue, 19 Mar 2024 16:27:08 +0000 Subject: [PATCH 2/2] Adding base class for perf, inter_token, and time_to_first latency records --- .../record/types/inter_token_latency_avg.py | 40 +--------- .../record/types/inter_token_latency_base.py | 74 +++++++++++++++++++ .../record/types/inter_token_latency_max.py | 40 +--------- .../record/types/inter_token_latency_min.py | 40 +--------- .../record/types/inter_token_latency_p75.py | 40 +--------- .../record/types/inter_token_latency_p90.py | 40 +--------- .../record/types/inter_token_latency_p99.py | 40 +--------- .../record/types/perf_latency_avg.py | 40 +--------- .../record/types/perf_latency_base.py | 74 +++++++++++++++++++ .../record/types/perf_latency_p90.py | 40 +--------- .../record/types/perf_latency_p95.py | 40 +--------- .../record/types/perf_latency_p99.py | 40 +--------- .../record/types/time_to_first_token_avg.py | 40 +--------- .../record/types/time_to_first_token_base.py | 74 +++++++++++++++++++ .../record/types/time_to_first_token_max.py | 40 +--------- .../record/types/time_to_first_token_min.py | 40 +--------- .../record/types/time_to_first_token_p75.py | 40 +--------- .../record/types/time_to_first_token_p90.py | 40 +--------- .../record/types/time_to_first_token_p99.py | 40 +--------- 19 files changed, 254 insertions(+), 608 deletions(-) create mode 100755 model_analyzer/record/types/inter_token_latency_base.py create mode 100755 model_analyzer/record/types/perf_latency_base.py create mode 100755 model_analyzer/record/types/time_to_first_token_base.py diff --git a/model_analyzer/record/types/inter_token_latency_avg.py b/model_analyzer/record/types/inter_token_latency_avg.py index 3810d1c7e..fe1dc7dfb 100755 --- a/model_analyzer/record/types/inter_token_latency_avg.py +++ b/model_analyzer/record/types/inter_token_latency_avg.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyAvg(DecreasingRecord): +class InterTokenLatencyAvg(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Avg Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_base.py b/model_analyzer/record/types/inter_token_latency_base.py new file mode 100755 index 000000000..dda70cefa --- /dev/null +++ b/model_analyzer/record/types/inter_token_latency_base.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyBase(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_max.py b/model_analyzer/record/types/inter_token_latency_max.py index ffb3879fb..ce2484144 100755 --- a/model_analyzer/record/types/inter_token_latency_max.py +++ b/model_analyzer/record/types/inter_token_latency_max.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyMax(DecreasingRecord): +class InterTokenLatencyMax(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Max Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_min.py b/model_analyzer/record/types/inter_token_latency_min.py index 3f6344bea..21e44883b 100755 --- a/model_analyzer/record/types/inter_token_latency_min.py +++ b/model_analyzer/record/types/inter_token_latency_min.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyMin(DecreasingRecord): +class InterTokenLatencyMin(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Min Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p75.py b/model_analyzer/record/types/inter_token_latency_p75.py index c2ff01664..1234306fd 100755 --- a/model_analyzer/record/types/inter_token_latency_p75.py +++ b/model_analyzer/record/types/inter_token_latency_p75.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyP75(DecreasingRecord): +class InterTokenLatencyP75(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p75 Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p90.py b/model_analyzer/record/types/inter_token_latency_p90.py index 4f53c69fa..58ae0ccb4 100755 --- a/model_analyzer/record/types/inter_token_latency_p90.py +++ b/model_analyzer/record/types/inter_token_latency_p90.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyP99(DecreasingRecord): +class InterTokenLatencyP99(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p90 Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/inter_token_latency_p99.py b/model_analyzer/record/types/inter_token_latency_p99.py index f203f78c2..d9f722772 100755 --- a/model_analyzer/record/types/inter_token_latency_p99.py +++ b/model_analyzer/record/types/inter_token_latency_p99.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.inter_token_latency_base import InterTokenLatencyBase @total_ordering -class InterTokenLatencyP99(DecreasingRecord): +class InterTokenLatencyP99(InterTokenLatencyBase): """ A record for perf_analyzer Inter token latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p99 Inter Token Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/perf_latency_avg.py b/model_analyzer/record/types/perf_latency_avg.py index 5452c0b79..aafbcbeb2 100755 --- a/model_analyzer/record/types/perf_latency_avg.py +++ b/model_analyzer/record/types/perf_latency_avg.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.perf_latency_base import PerfLatencyBase @total_ordering -class PerfLatencyAvg(DecreasingRecord): +class PerfLatencyAvg(PerfLatencyBase): """ A record for perf_analyzer latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Avg Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/perf_latency_base.py b/model_analyzer/record/types/perf_latency_base.py new file mode 100755 index 000000000..3c3e76cac --- /dev/null +++ b/model_analyzer/record/types/perf_latency_base.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class PerfLatencyBase(DecreasingRecord): + """ + A base class for perf_analyzer latency metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/perf_latency_p90.py b/model_analyzer/record/types/perf_latency_p90.py index c6718fe40..7eafa3b28 100755 --- a/model_analyzer/record/types/perf_latency_p90.py +++ b/model_analyzer/record/types/perf_latency_p90.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.perf_latency_base import PerfLatencyBase @total_ordering -class PerfLatencyP90(DecreasingRecord): +class PerfLatencyP90(PerfLatencyBase): """ A record for perf_analyzer latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p90 Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/perf_latency_p95.py b/model_analyzer/record/types/perf_latency_p95.py index 84ed9e648..ccb9f8c01 100755 --- a/model_analyzer/record/types/perf_latency_p95.py +++ b/model_analyzer/record/types/perf_latency_p95.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.perf_latency_base import PerfLatencyBase @total_ordering -class PerfLatencyP95(DecreasingRecord): +class PerfLatencyP95(PerfLatencyBase): """ A record for perf_analyzer latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p95 Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/perf_latency_p99.py b/model_analyzer/record/types/perf_latency_p99.py index af4d06da4..46d352021 100755 --- a/model_analyzer/record/types/perf_latency_p99.py +++ b/model_analyzer/record/types/perf_latency_p99.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.perf_latency_base import PerfLatencyBase @total_ordering -class PerfLatencyP99(DecreasingRecord): +class PerfLatencyP99(PerfLatencyBase): """ A record for perf_analyzer latency metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p99 Latency (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_avg.py b/model_analyzer/record/types/time_to_first_token_avg.py index d87642b1d..28da5d294 100755 --- a/model_analyzer/record/types/time_to_first_token_avg.py +++ b/model_analyzer/record/types/time_to_first_token_avg.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenAvg(DecreasingRecord): +class TimeToFirstTokenAvg(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Avg Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_base.py b/model_analyzer/record/types/time_to_first_token_base.py new file mode 100755 index 000000000..5ef6e9070 --- /dev/null +++ b/model_analyzer/record/types/time_to_first_token_base.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from model_analyzer.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenBase(DecreasingRecord): + """ + A base class record for perf_analyzer time to first token metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_max.py b/model_analyzer/record/types/time_to_first_token_max.py index d53741d7e..f9ccc0a52 100755 --- a/model_analyzer/record/types/time_to_first_token_max.py +++ b/model_analyzer/record/types/time_to_first_token_max.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenMax(DecreasingRecord): +class TimeToFirstTokenMax(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Max Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_min.py b/model_analyzer/record/types/time_to_first_token_min.py index ee556f6ba..4cc563c86 100755 --- a/model_analyzer/record/types/time_to_first_token_min.py +++ b/model_analyzer/record/types/time_to_first_token_min.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenMin(DecreasingRecord): +class TimeToFirstTokenMin(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "Min Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p75.py b/model_analyzer/record/types/time_to_first_token_p75.py index f996517e9..042972368 100755 --- a/model_analyzer/record/types/time_to_first_token_p75.py +++ b/model_analyzer/record/types/time_to_first_token_p75.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenP75(DecreasingRecord): +class TimeToFirstTokenP75(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p75 Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p90.py b/model_analyzer/record/types/time_to_first_token_p90.py index 6009d06ba..853adbdb4 100755 --- a/model_analyzer/record/types/time_to_first_token_p90.py +++ b/model_analyzer/record/types/time_to_first_token_p90.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenP90(DecreasingRecord): +class TimeToFirstTokenP90(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p90 Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value())) diff --git a/model_analyzer/record/types/time_to_first_token_p99.py b/model_analyzer/record/types/time_to_first_token_p99.py index 2302c82d7..24f2ff088 100755 --- a/model_analyzer/record/types/time_to_first_token_p99.py +++ b/model_analyzer/record/types/time_to_first_token_p99.py @@ -16,11 +16,11 @@ from functools import total_ordering -from model_analyzer.record.record import DecreasingRecord +from model_analyzer.record.types.time_to_first_token_base import TimeToFirstTokenBase @total_ordering -class TimeToFirstTokenP99(DecreasingRecord): +class TimeToFirstTokenP99(TimeToFirstTokenBase): """ A record for perf_analyzer Time to first token metric """ @@ -58,39 +58,3 @@ def header(cls, aggregation_tag=False): """ return "p99 Time To First Token (ms)" - - def __eq__(self, other): - """ - Allows checking for - equality between two records - """ - - return self.value() == other.value() - - def __lt__(self, other): - """ - Allows checking if - this record is less than - the other - """ - - return self.value() > other.value() - - def __add__(self, other): - """ - Allows adding two records together - to produce a brand new record. - """ - - return self.__class__(value=(self.value() + other.value())) - - def __sub__(self, other): - """ - Allows subbing two records together - to produce a brand new record. - - ** Note this does reverse subtraction because - of the inverted nature of latency (lower is better) - """ - - return self.__class__(value=(other.value() - self.value()))