diff --git a/genai-perf/genai_perf/measurements/model_config_measurement.py b/genai-perf/genai_perf/measurements/model_config_measurement.py new file mode 100644 index 00000000..bf157b56 --- /dev/null +++ b/genai-perf/genai_perf/measurements/model_config_measurement.py @@ -0,0 +1,234 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + + +from copy import deepcopy +from dataclasses import dataclass +from functools import total_ordering +from statistics import mean +from typing import Any, Dict, Optional, TypeAlias + +from genai_perf.record.record import Record + +Records: TypeAlias = Dict[str, Record] +MetricObjectives: TypeAlias = Dict[str, float] + + +@dataclass(frozen=True) +class ModelConfigMeasurementDefaults: + METRIC_WEIGHTING = {"perf_throughput": 1.0} + + SELF_IS_BETTER = 1 + OTHER_IS_BETTER = -1 + EQUALIVILENT = 0 + + COMPARISON_SCORE_THRESHOLD = 0 + + +@total_ordering +class ModelConfigMeasurement: + """ + Encapsulates the set of performance metrics (measurements) obtained when profiling a model + """ + + def __init__(self, perf_metrics: Records): + """ + perf_metrics: + Metrics (stored in the Record class) that are associated with how the model + performed. Examples include throughput and latency. + """ + + self._perf_metrics = perf_metrics + + # Set a default metric weighting + self._metric_weights = ModelConfigMeasurementDefaults.METRIC_WEIGHTING + + ########################################################################### + # Accessor Methods + ########################################################################### + def get_perf_metrics(self) -> Records: + return self._perf_metrics + + def get_perf_metric(self, name: str) -> Optional[Record]: + return self._perf_metrics[name] if name in self._perf_metrics else None + + def get_perf_metric_value(self, name: str, return_value: int = 0) -> Any: + metric = self.get_perf_metric(name) + return metric.value() if metric else return_value + + def get_weighted_score(self, other: "ModelConfigMeasurement") -> float: + """ + Returns the weighted score between this MCM and the + provided MCM + """ + return self._calculate_weighted_score(other) + + def set_metric_weighting(self, metric_objectives: MetricObjectives) -> None: + """ + Sets the metric weighting for this measurement based + on the objectives + """ + + # Each individual weighting is based on it's percentage of the total + # weighting. Example: {A: 1, B: 3} would be stored as {A: 0.25, B: 0.75} + self._metric_weights = { + objective: (value / sum(metric_objectives.values())) + for objective, value in metric_objectives.items() + } + + ########################################################################### + # Checkpoint Methods + ########################################################################### + def write_to_checkpoint(self) -> Dict[str, Any]: + """ + Converts the class data into a dictionary that can be written to + the checkpoint file + """ + mcm_dict = deepcopy(self.__dict__) + + # Values based solely on user/config settings (that can vary from run to run) + # are not stored in the checkpoint + del mcm_dict["_metric_weights"] + + return mcm_dict + + @classmethod + def read_from_checkpoint(cls, mcm_dict: Dict[str, Any]) -> "ModelConfigMeasurement": + """ + Takes the checkpoint's representation of the class and creates (and populates) + a new instance of a MCM + """ + perf_metrics = cls._read_perf_metrics_from_checkpoint(mcm_dict["_perf_metrics"]) + + mcm = ModelConfigMeasurement(perf_metrics) + + return mcm + + @classmethod + def _read_perf_metrics_from_checkpoint( + cls, perf_metrics_dict: Dict[str, Any] + ) -> Records: + perf_metrics: Records = {} + + for [tag, record_dict] in perf_metrics_dict.values(): + record = Record.get(tag) + record = record.read_from_checkpoint(record_dict) # type: ignore + perf_metrics[tag] = record # type: ignore + + return perf_metrics + + ########################################################################### + # Comparison Methods + ########################################################################### + def is_better_than(self, other: "ModelConfigMeasurement") -> bool: + return ( + self._compare_measurements(other) + == ModelConfigMeasurementDefaults.SELF_IS_BETTER + ) + + def __lt__(self, other: "ModelConfigMeasurement") -> bool: + return ( + self._compare_measurements(other) + == ModelConfigMeasurementDefaults.OTHER_IS_BETTER + ) + + def __gt__(self, other: "ModelConfigMeasurement") -> bool: + return ( + self._compare_measurements(other) + == ModelConfigMeasurementDefaults.SELF_IS_BETTER + ) + + def __eq__(self, other: "ModelConfigMeasurement") -> bool: # type: ignore + return ( + self._compare_measurements(other) + == ModelConfigMeasurementDefaults.EQUALIVILENT + ) + + def _compare_measurements(self, other: "ModelConfigMeasurement") -> int: + """ + Compares two MCMs + based on the weighted metric objectives + """ + weighted_score = self._calculate_weighted_score(other) + + if weighted_score > ModelConfigMeasurementDefaults.COMPARISON_SCORE_THRESHOLD: + return ModelConfigMeasurementDefaults.SELF_IS_BETTER + elif ( + weighted_score < -ModelConfigMeasurementDefaults.COMPARISON_SCORE_THRESHOLD + ): + return ModelConfigMeasurementDefaults.OTHER_IS_BETTER + else: + return ModelConfigMeasurementDefaults.EQUALIVILENT + + ########################################################################### + # Calculation Methods + ########################################################################### + def _calculate_weighted_score(self, other: "ModelConfigMeasurement") -> float: + """ + Calculates the weighted score between two + ModelConfig measurements based on the weighted + metric objectives + + A positive value indicates this MCM is better than the other + """ + + weighted_score = 0.0 + for objective, weight in self._metric_weights.items(): + self_metric = self.get_perf_metric(objective) + other_metric = other.get_perf_metric(objective) + + # This handles the case where metric(s) do not exist + if self_metric and other_metric is None: + return ModelConfigMeasurementDefaults.SELF_IS_BETTER + elif other_metric and self_metric is None: + return ModelConfigMeasurementDefaults.OTHER_IS_BETTER + elif self_metric is None and other_metric is None: + return ModelConfigMeasurementDefaults.EQUALIVILENT + + metric_diff = self_metric - other_metric # type: ignore + average = mean([self_metric.value(), other_metric.value()]) # type: ignore + weighted_score += weight * (metric_diff.value() / average) + + return weighted_score + + def calculate_weighted_percentage_gain( + self, other: "ModelConfigMeasurement" + ) -> float: + """ + Calculates the weighted percentage between two + ModelConfig measurements based on the weighted + metric objectives + + The weighted percentage gain. A positive value indicates + this MCM is better than the other + """ + + weighted_pct = 0.0 + for objective, weight in self._metric_weights.items(): + self_metric = self.get_perf_metric(objective) + other_metric = other.get_perf_metric(objective) + + # This handles the case where metric(s) do not exist + if self_metric and other_metric is None: + return 100 * ModelConfigMeasurementDefaults.SELF_IS_BETTER + elif other_metric and self_metric is None: + return 100 * ModelConfigMeasurementDefaults.OTHER_IS_BETTER + elif self_metric is None and other_metric is None: + return 100 * ModelConfigMeasurementDefaults.EQUALIVILENT + + metric_pct = self_metric.calculate_percentage_gain(other_metric) # type: ignore + + weighted_pct += metric_pct * weight + + return weighted_pct diff --git a/genai-perf/genai_perf/record/gpu_record.py b/genai-perf/genai_perf/record/gpu_record.py new file mode 100644 index 00000000..1eb95398 --- /dev/null +++ b/genai-perf/genai_perf/record/gpu_record.py @@ -0,0 +1,66 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from .record import Record + + +class GPURecord(Record): + """ + This is a base class for any + GPU based record + """ + + def __init__(self, value: float, device_uuid: str = "", timestamp: int = 0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, timestamp) + self._device_uuid = device_uuid + + def device_uuid(self) -> str: + """ + Returns + ------- + str + uuid for the GPU that this metric was sampled on + """ + + return self._device_uuid + + @classmethod + def read_from_checkpoint(cls, record_dict) -> "Record": + record = cls(0) + for key in ["_value", "_timestamp", "_device"]: + if key in record_dict: + setattr(record, key, record_dict[key]) + return record + + +class IncreasingGPURecord(GPURecord): + def _positive_is_better(self) -> bool: + return True + + +class DecreasingGPURecord(GPURecord): + def _positive_is_better(self) -> bool: + return False diff --git a/genai-perf/genai_perf/record/record.py b/genai-perf/genai_perf/record/record.py new file mode 100644 index 00000000..8cb17cba --- /dev/null +++ b/genai-perf/genai_perf/record/record.py @@ -0,0 +1,274 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import importlib +import os +from abc import ABCMeta, abstractmethod +from statistics import mean +from typing import Dict, Union + +from genai_perf.exceptions import GenAIPerfException + + +class RecordType(ABCMeta): + """ + A metaclass that holds the instantiated Record types + """ + + record_types: Dict[str, "RecordType"] = {} + + def __new__(cls, name, base, namespace): + """ + This function is called upon declaration of any classes of type + RecordType + """ + + record_type = super().__new__(cls, name, base, namespace) + + # If record_type.tag is a string, register it here + if isinstance(record_type.tag, str): + cls.record_types[record_type.tag] = record_type + return record_type + + @classmethod + def get(cls, tag: str) -> "RecordType": + """ + Parameters + ---------- + tag : str + tag that a record type has registered it classname with + + Returns + ------- + The class of type RecordType corresponding to the tag + """ + + if tag not in cls.record_types: + try: + importlib.import_module("genai_perf.record.types.%s" % tag) + except ImportError as e: + print(e) + return cls.record_types[tag] + + @classmethod + def get_all_record_types(cls) -> Dict[str, "RecordType"]: + """ + Returns + ------- + dict + keys are tags and values are + all the types that have this as a + metaclass + """ + + type_module_directory = os.path.join( + globals()["__spec__"].origin.rsplit("/", 1)[0], "types" + ) + for filename in os.listdir(type_module_directory): + if filename != "__init__.py" and filename.endswith(".py"): + try: + importlib.import_module(f"genai_perf.record.types.{filename[:-3]}") + except AttributeError: + raise GenAIPerfException("Error retrieving all record types") + return cls.record_types + + +class Record(metaclass=RecordType): + """ + This class is used for representing + records + """ + + def __init__(self, value: Union[float, int], timestamp: int): + """ + Parameters + ---------- + value : float or int + The value of the GPU metric + timestamp : int + The timestamp for the record in nanoseconds + """ + + assert type(value) is float or type(value) is int + assert type(timestamp) is int + + self._value = value + self._timestamp = timestamp + + @staticmethod + def aggregation_function(): + """ + The function that is used to aggregate + this type of record + + Returns + ------- + callable() + [Records] -> Record + """ + + return lambda records: max(records, key=lambda r: r.value()) + + @staticmethod + def value_function() -> float: + """ + Returns the average value from a list + + Returns + ------- + Average value of the list + """ + return mean # type: ignore + + @staticmethod + @abstractmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag : boolean + An optional tag that may be displayed as part of the header + indicating that this record has been aggregated using max, min or + average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + @property + @abstractmethod + def tag(self) -> str: + """ + Returns + ------- + str + the name tag of the record type. + """ + + def write_to_checkpoint(self): + return (self.tag, self.__dict__) + + @classmethod + def read_from_checkpoint(cls, record_dict) -> "Record": + record = cls(0, 0) + for key in ["_value", "_timestamp"]: + if key in record_dict: + setattr(record, key, record_dict[key]) + return record + + def value(self) -> Union[float, int]: + """ + This method returns the value of recorded metric + + Returns + ------- + float + value of the metric + """ + + return self._value + + def timestamp(self) -> int: + """ + This method should return the time at which the record was created. + + Returns + ------- + float + timestamp passed in during + record creation + """ + + return self._timestamp + + def __mul__(self, other) -> "Record": + """ + Defines left multiplication for records with floats or ints. + + Returns + ------- + Record + """ + + if isinstance(other, (int, float)): + return self.__class__(value=(self.value() * other), timestamp=0) + else: + raise TypeError + + def __rmul__(self, other) -> "Record": + """ + Defines right multiplication + """ + + return self.__mul__(other) + + def __truediv__(self, other) -> "Record": + """ + Defines left multiplication for records with floats or ints + + Returns + ------- + Record + """ + + if isinstance(other, (int, float)): + return self.__class__(value=(self.value() / other), timestamp=0) + + else: + raise TypeError + + @abstractmethod + def _positive_is_better(self) -> bool: + """ + Returns a bool indicating if a larger positive value is better + for a given record type + """ + + def calculate_percentage_gain(self, other: "Record") -> float: + """ + Calculates percentage gain between records + """ + + # When increasing values are better gain is based on the original value (other): + # example: 200 vs. 100 is (200 - 100) / 100 = 100% + # example: 100 vs. 200 is (100 - 200) / 200 = -50% + if self._positive_is_better(): + return ((self.value() - other.value()) / other.value()) * 100 + + # When decreasing values are better gain is based on the new value (self): + # example: 100 vs. 200 is (200 - 100) / 100 = 100% + # example: 200 vs. 100 is (100 - 200) / 200 = -50% + else: + return ((other.value() - self.value()) / self.value()) * 100 + + +class IncreasingRecord(Record): + """ + Record where an increasing positive value is better + """ + + def _positive_is_better(self) -> bool: + return True + + +class DecreasingRecord(Record): + """ + Record where an increasing positive value is worse + """ + + def _positive_is_better(self) -> bool: + return False diff --git a/genai-perf/genai_perf/record/types/__init__.py b/genai-perf/genai_perf/record/types/__init__.py new file mode 100644 index 00000000..c6959fce --- /dev/null +++ b/genai-perf/genai_perf/record/types/__init__.py @@ -0,0 +1,13 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. diff --git a/genai-perf/genai_perf/record/types/cpu_available_ram.py b/genai-perf/genai_perf/record/types/cpu_available_ram.py new file mode 100644 index 00000000..b4e069ac --- /dev/null +++ b/genai-perf/genai_perf/record/types/cpu_available_ram.py @@ -0,0 +1,91 @@ +# Copyright 2020-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class CPUAvailableRAM(IncreasingRecord): + """ + The Available CPU memory + """ + + tag = "cpu_available_ram" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + CPU free memory + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Max " if aggregation_tag else "") + "RAM Available (MB)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return CPUAvailableRAM(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return CPUAvailableRAM(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/cpu_used_ram.py b/genai-perf/genai_perf/record/types/cpu_used_ram.py new file mode 100644 index 00000000..7ee7f1d4 --- /dev/null +++ b/genai-perf/genai_perf/record/types/cpu_used_ram.py @@ -0,0 +1,91 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class CPUUsedRAM(DecreasingRecord): + """ + The CPU memory usage record + """ + + tag = "cpu_used_ram" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + CPU used memory + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Max " if aggregation_tag else "") + "RAM Usage (MB)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is better than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return CPUUsedRAM(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return CPUUsedRAM(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_free_memory.py b/genai-perf/genai_perf/record/types/gpu_free_memory.py new file mode 100644 index 00000000..21bf3f58 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_free_memory.py @@ -0,0 +1,94 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUFreeMemory(IncreasingGPURecord): + """ + The free memory in the GPU. + """ + + tag = "gpu_free_memory" + + def __init__(self, value, device_uuid=None, timestamp=0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return GPUFreeMemory(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return GPUFreeMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_power_usage.py b/genai-perf/genai_perf/record/types/gpu_power_usage.py new file mode 100644 index 00000000..5f1fc49d --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_power_usage.py @@ -0,0 +1,105 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import DecreasingGPURecord + + +@total_ordering +class GPUPowerUsage(DecreasingGPURecord): + """ + GPU Power Usage + """ + + tag = "gpu_power_usage" + + def __init__(self, value, device_uuid=None, timestamp=0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + """ + The function that is used to aggregate + this type of record + """ + + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed as part of the header + indicating that this record has been aggregated using max, min or + average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Average " if aggregation_tag else "") + "GPU Power Usage (W)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return other.value() < self.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return GPUPowerUsage(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return GPUPowerUsage(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_total_memory.py b/genai-perf/genai_perf/record/types/gpu_total_memory.py new file mode 100644 index 00000000..45fdc300 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_total_memory.py @@ -0,0 +1,94 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUTotalMemory(IncreasingGPURecord): + """ + The total memory in the GPU. + """ + + tag = "gpu_total_memory" + + def __init__(self, value, device_uuid=None, timestamp=0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Max " if aggregation_tag else "") + "GPU Memory Available (MB)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return GPUTotalMemory(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return GPUTotalMemory(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_used_memory.py b/genai-perf/genai_perf/record/types/gpu_used_memory.py new file mode 100644 index 00000000..c4b1cc66 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_used_memory.py @@ -0,0 +1,94 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import DecreasingGPURecord + + +@total_ordering +class GPUUsedMemory(DecreasingGPURecord): + """ + The used memory in the GPU. + """ + + tag = "gpu_used_memory" + + def __init__(self, value, device_uuid=None, timestamp=0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Max " if aggregation_tag else "") + "GPU Memory Usage (MB)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return GPUUsedMemory(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return GPUUsedMemory(device_uuid=None, value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/gpu_utilization.py b/genai-perf/genai_perf/record/types/gpu_utilization.py new file mode 100644 index 00000000..67a71f64 --- /dev/null +++ b/genai-perf/genai_perf/record/types/gpu_utilization.py @@ -0,0 +1,106 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.gpu_record import IncreasingGPURecord + + +@total_ordering +class GPUUtilization(IncreasingGPURecord): + """ + GPU utilization record + """ + + tag = "gpu_utilization" + + def __init__(self, value, device_uuid=None, timestamp=0): + """ + Parameters + ---------- + value : float + The value of the GPU metrtic + device_uuid : str + The GPU device uuid this metric is associated + with. + timestamp : int + The timestamp for the record in nanoseconds + """ + + super().__init__(value, device_uuid, timestamp) + + @staticmethod + def aggregation_function(): + """ + The function that is used to aggregate + this type of record + """ + + def average(seq): + return sum(seq[1:], start=seq[0]) / len(seq) + + return average + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return ("Average " if aggregation_tag else "") + "GPU Utilization (%)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return GPUUtilization(device_uuid=None, value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return GPUUtilization(device_uuid=None, value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_avg.py b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py new file mode 100644 index 00000000..d6a6f947 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_avg.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyAvg(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_avg" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Avg Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_base.py b/genai-perf/genai_perf/record/types/inter_token_latency_base.py new file mode 100644 index 00000000..f267969c --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_base.py @@ -0,0 +1,72 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class InterTokenLatencyBase(DecreasingRecord): + """ + A record for perf_analyzer Inter token latency metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_max.py b/genai-perf/genai_perf/record/types/inter_token_latency_max.py new file mode 100644 index 00000000..d50d8cd1 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_max.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyMax(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_max" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Max Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_min.py b/genai-perf/genai_perf/record/types/inter_token_latency_min.py new file mode 100644 index 00000000..4a848480 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_min.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyMin(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_min" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Min Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p25.py b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py new file mode 100644 index 00000000..261caae6 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p25.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP25(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p25" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p25 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p50.py b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py new file mode 100644 index 00000000..4f277996 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p50.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP50(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p50" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p50 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p75.py b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py new file mode 100644 index 00000000..f95a938a --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p75.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP75(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p75" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p75 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p90.py b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py new file mode 100644 index 00000000..14b9e489 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p90.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP90(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p90" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p90 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p95.py b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py new file mode 100644 index 00000000..685d25b6 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p95.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP95(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p95" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p95 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/inter_token_latency_p99.py b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py new file mode 100644 index 00000000..73354707 --- /dev/null +++ b/genai-perf/genai_perf/record/types/inter_token_latency_p99.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.inter_token_latency_base import InterTokenLatencyBase + + +@total_ordering +class InterTokenLatencyP99(InterTokenLatencyBase): + """ + A record for perf_analyzer Inter token latency metric + """ + + tag = "inter_token_latency_p99" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p99 Inter Token Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/output_token_throughput.py b/genai-perf/genai_perf/record/types/output_token_throughput.py new file mode 100644 index 00000000..d5635491 --- /dev/null +++ b/genai-perf/genai_perf/record/types/output_token_throughput.py @@ -0,0 +1,103 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class OutputTokenThroughput(IncreasingRecord): + """ + A record for perf_analyzer + metric 'Output Token Throughput' + """ + + tag = "output_token_throughput" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + The throughput from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + """ + Returns the total value from a list + + Returns + ------- + Total value of the list + """ + return sum + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Output Token Throughput (infer/sec)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/perf_client_response_wait.py b/genai-perf/genai_perf/record/types/perf_client_response_wait.py new file mode 100644 index 00000000..0204634f --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_client_response_wait.py @@ -0,0 +1,95 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfClientResponseWait(DecreasingRecord): + """ + A record for perf_analyzer + metric 'Client response wait' + """ + + tag = "perf_client_response_wait" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Response Wait Time (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfClientResponseWait(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfClientResponseWait(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_client_send_recv.py b/genai-perf/genai_perf/record/types/perf_client_send_recv.py new file mode 100644 index 00000000..e6eca6a2 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_client_send_recv.py @@ -0,0 +1,95 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfClientSendRecv(DecreasingRecord): + """ + A record for perf_analyzer + metric 'Client send/recv' + """ + + tag = "perf_client_send_recv" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Client Send/Recv (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfClientSendRecv(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfClientSendRecv(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_latency_avg.py b/genai-perf/genai_perf/record/types/perf_latency_avg.py new file mode 100644 index 00000000..7b40ce14 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_latency_avg.py @@ -0,0 +1,58 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.perf_latency_base import PerfLatencyBase + + +@total_ordering +class PerfLatencyAvg(PerfLatencyBase): + """ + A record for perf_analyzer latency metric + """ + + tag = "perf_latency_avg" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Avg Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_base.py b/genai-perf/genai_perf/record/types/perf_latency_base.py new file mode 100644 index 00000000..b7d6f5f5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_latency_base.py @@ -0,0 +1,72 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfLatencyBase(DecreasingRecord): + """ + A base class for perf_analyzer latency metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_latency_p90.py b/genai-perf/genai_perf/record/types/perf_latency_p90.py new file mode 100644 index 00000000..e05b7632 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_latency_p90.py @@ -0,0 +1,58 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.perf_latency_base import PerfLatencyBase + + +@total_ordering +class PerfLatencyP90(PerfLatencyBase): + """ + A record for perf_analyzer latency metric + """ + + tag = "perf_latency_p90" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p90 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p95.py b/genai-perf/genai_perf/record/types/perf_latency_p95.py new file mode 100644 index 00000000..53877cad --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_latency_p95.py @@ -0,0 +1,58 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.perf_latency_base import PerfLatencyBase + + +@total_ordering +class PerfLatencyP95(PerfLatencyBase): + """ + A record for perf_analyzer latency metric + """ + + tag = "perf_latency_p95" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p95 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_latency_p99.py b/genai-perf/genai_perf/record/types/perf_latency_p99.py new file mode 100644 index 00000000..ba568a90 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_latency_p99.py @@ -0,0 +1,58 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.perf_latency_base import PerfLatencyBase + + +@total_ordering +class PerfLatencyP99(PerfLatencyBase): + """ + A record for perf_analyzer latency metric + """ + + tag = "perf_latency_p99" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p99 Latency (ms)" diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_infer.py b/genai-perf/genai_perf/record/types/perf_server_compute_infer.py new file mode 100644 index 00000000..5e65dd9e --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_server_compute_infer.py @@ -0,0 +1,95 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfServerComputeInfer(DecreasingRecord): + """ + A record for Server compute infer time + from the perf analyzer + """ + + tag = "perf_server_compute_infer" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Server Compute Infer time (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfServerComputeInfer(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfServerComputeInfer(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_input.py b/genai-perf/genai_perf/record/types/perf_server_compute_input.py new file mode 100644 index 00000000..313f7a44 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_server_compute_input.py @@ -0,0 +1,95 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfServerComputeInput(DecreasingRecord): + """ + A record for Server compute input time + from the perf analyzer + """ + + tag = "perf_server_compute_input" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Server Compute Input time (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfServerComputeInput(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfServerComputeInput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_compute_output.py b/genai-perf/genai_perf/record/types/perf_server_compute_output.py new file mode 100644 index 00000000..914e3cf5 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_server_compute_output.py @@ -0,0 +1,95 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfServerComputeOutput(DecreasingRecord): + """ + A record for Server compute output time + from the perf analyzer + """ + + tag = "perf_server_compute_output" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Server Compute Output time (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfServerComputeOutput(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfServerComputeOutput(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_server_queue.py b/genai-perf/genai_perf/record/types/perf_server_queue.py new file mode 100644 index 00000000..3f1c7144 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_server_queue.py @@ -0,0 +1,95 @@ +# Copyright 2021-2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class PerfServerQueue(DecreasingRecord): + """ + A record for Server queue time + from the perf analyzer + """ + + tag = "perf_server_queue" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Server Queue time (ms)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfServerQueue(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return PerfServerQueue(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/perf_throughput.py b/genai-perf/genai_perf/record/types/perf_throughput.py new file mode 100644 index 00000000..4d52d342 --- /dev/null +++ b/genai-perf/genai_perf/record/types/perf_throughput.py @@ -0,0 +1,103 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import IncreasingRecord + + +@total_ordering +class PerfThroughput(IncreasingRecord): + """ + A record for perf_analyzer + metric 'Throughput' + """ + + tag = "perf_throughput" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + The throughput from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @staticmethod + def value_function(): + """ + Returns the total value from a list + + Returns + ------- + Total value of the list + """ + return sum + + @staticmethod + def header(aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Throughput (infer/sec)" + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() < other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return PerfThroughput(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subtracting two records together + to produce a brand new record. + """ + + return PerfThroughput(value=(self.value() - other.value())) diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_avg.py b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py new file mode 100644 index 00000000..3a9b861b --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_avg.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenAvg(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_avg" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Avg Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_base.py b/genai-perf/genai_perf/record/types/time_to_first_token_base.py new file mode 100644 index 00000000..f7216f3f --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_base.py @@ -0,0 +1,72 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.record import DecreasingRecord + + +@total_ordering +class TimeToFirstTokenBase(DecreasingRecord): + """ + A base class record for perf_analyzer time to first token metric + """ + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + def __eq__(self, other): + """ + Allows checking for + equality between two records + """ + + return self.value() == other.value() + + def __lt__(self, other): + """ + Allows checking if + this record is less than + the other + """ + + return self.value() > other.value() + + def __add__(self, other): + """ + Allows adding two records together + to produce a brand new record. + """ + + return self.__class__(value=(self.value() + other.value())) + + def __sub__(self, other): + """ + Allows subbing two records together + to produce a brand new record. + + ** Note this does reverse subtraction because + of the inverted nature of latency (lower is better) + """ + + return self.__class__(value=(other.value() - self.value())) diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_max.py b/genai-perf/genai_perf/record/types/time_to_first_token_max.py new file mode 100644 index 00000000..eaba2b75 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_max.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenMax(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_max" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Max Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_min.py b/genai-perf/genai_perf/record/types/time_to_first_token_min.py new file mode 100644 index 00000000..15612bee --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_min.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenMin(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_min" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "Min Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p25.py b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py new file mode 100644 index 00000000..cd472f67 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p25.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP25(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p25" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p25 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p50.py b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py new file mode 100644 index 00000000..68b8a5a8 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p50.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP50(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p50" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p50 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p75.py b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py new file mode 100644 index 00000000..6c22469b --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p75.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP75(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p75" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p75 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p90.py b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py new file mode 100644 index 00000000..431ce6eb --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p90.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP90(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p90" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p90 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p95.py b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py new file mode 100644 index 00000000..5b118301 --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p95.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP95(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p95" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p95 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/record/types/time_to_first_token_p99.py b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py new file mode 100644 index 00000000..68125f2e --- /dev/null +++ b/genai-perf/genai_perf/record/types/time_to_first_token_p99.py @@ -0,0 +1,58 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from functools import total_ordering + +from genai_perf.record.types.time_to_first_token_base import TimeToFirstTokenBase + + +@total_ordering +class TimeToFirstTokenP99(TimeToFirstTokenBase): + """ + A record for perf_analyzer Time to first token metric + """ + + tag = "time_to_first_token_p99" + + def __init__(self, value, timestamp=0): + """ + Parameters + ---------- + value : float + the latency extracted from the perf analyzer output + timestamp : float + Elapsed time from start of program + """ + + super().__init__(value, timestamp) + + @classmethod + def header(cls, aggregation_tag=False): + """ + Parameters + ---------- + aggregation_tag: bool + An optional tag that may be displayed + as part of the header indicating that + this record has been aggregated using + max, min or average etc. + + Returns + ------- + str + The full name of the + metric. + """ + + return "p99 Time To First Token (ms)" diff --git a/genai-perf/genai_perf/utils.py b/genai-perf/genai_perf/utils.py index 3d0612a7..56861a2f 100644 --- a/genai-perf/genai_perf/utils.py +++ b/genai-perf/genai_perf/utils.py @@ -127,3 +127,14 @@ def sample_bounded_normal(mean, stddev, lower=float("-inf"), upper=float("inf")) """ n = random.gauss(mean, stddev) return min(max(lower, n), upper) + + +# FIXME: OPTIMIZE +# This will move to the checkpoint class when it's created +def checkpoint_encoder(obj): + if isinstance(obj, bytes): + return obj.decode("utf-8") + elif hasattr(obj, "write_to_checkpoint"): + return obj.write_to_checkpoint() + else: + return obj.__dict__ diff --git a/genai-perf/tests/test_model_config_measurement.py b/genai-perf/tests/test_model_config_measurement.py new file mode 100644 index 00000000..b58fd8bb --- /dev/null +++ b/genai-perf/tests/test_model_config_measurement.py @@ -0,0 +1,207 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import json +import unittest +from unittest.mock import patch + +from genai_perf.measurements.model_config_measurement import ( + ModelConfigMeasurement, + ModelConfigMeasurementDefaults, +) +from genai_perf.record.types.perf_latency_p99 import PerfLatencyP99 +from genai_perf.record.types.perf_throughput import PerfThroughput +from genai_perf.record.types.time_to_first_token_avg import TimeToFirstTokenAvg +from genai_perf.utils import checkpoint_encoder + + +class TestModelConfigMeasurement(unittest.TestCase): + ########################################################################### + # Setup & Teardown + ########################################################################### + def setUp(self): + + self.throughput_recordA = PerfThroughput(1000) + self.latency_recordA = PerfLatencyP99(20) + + self.perf_metricsA = { + PerfThroughput.tag: self.throughput_recordA, + PerfLatencyP99.tag: self.latency_recordA, + } + + self.mcmA = ModelConfigMeasurement(self.perf_metricsA) + + self.throughput_recordB = PerfThroughput(500) + self.latency_recordB = PerfLatencyP99(10) + + self.perf_metricsB = { + PerfThroughput.tag: self.throughput_recordB, + PerfLatencyP99.tag: self.latency_recordB, + } + + self.mcmB = ModelConfigMeasurement(self.perf_metricsB) + + def tearDown(self): + patch.stopall() + + ########################################################################### + # Accessor Tests + ########################################################################### + def test_basic_accessor_methods(self): + """ + Test that values are properly initialized + """ + self.assertEqual(self.mcmA.get_perf_metrics(), self.perf_metricsA) + self.assertEqual( + self.mcmA.get_perf_metric(PerfLatencyP99.tag), self.latency_recordA + ) + self.assertEqual( + self.mcmA.get_perf_metric_value(PerfThroughput.tag, return_value=-1), + self.throughput_recordA.value(), + ) + self.assertEqual( + self.mcmA.get_perf_metric_value(TimeToFirstTokenAvg.tag, return_value=-1), + -1, + ) + + def test_set_metric_weighting(self): + """ + Test that metric weighting is set correctly + """ + # Default + self.assertEqual( + ModelConfigMeasurementDefaults.METRIC_WEIGHTING, self.mcmA._metric_weights + ) + + self.mcmA.set_metric_weighting({PerfThroughput.tag: 2, PerfLatencyP99.tag: 3}) + expected_mw = {PerfThroughput.tag: 2 / 5, PerfLatencyP99.tag: 3 / 5} + self.assertEqual(expected_mw, self.mcmA._metric_weights) + + def test_get_weighted_score(self): + """ + Test that weighted score is returned correctly + """ + + # In the default case we are comparing throughputs with mcmA = 1000, mcmB = 500 + # scoreA will be positive (2/3), and scoreB be will be its negative + scoreA = self.mcmA.get_weighted_score(self.mcmB) + scoreB = self.mcmB.get_weighted_score(self.mcmA) + + self.assertEqual(2 / 3, scoreA) + self.assertEqual(-2 / 3, scoreB) + + # In this case we will change the objective to be latency, with mcmA = 20, mcmB = 5 + # since latency is a decreasing record (lower is better), scoreB will be positive + self.mcmA.set_metric_weighting({PerfLatencyP99.tag: 1}) + self.mcmB.set_metric_weighting({PerfLatencyP99.tag: 1}) + scoreA = self.mcmA.get_weighted_score(self.mcmB) + scoreB = self.mcmB.get_weighted_score(self.mcmA) + + self.assertEqual(-2 / 3, scoreA) + self.assertEqual(2 / 3, scoreB) + + ########################################################################### + # Checkpoint Tests + ########################################################################### + def test_checkpoint_methods(self): + """ + Checks to ensure checkpoint methods work as intended + """ + mcmA_json = json.dumps(self.mcmA, default=checkpoint_encoder) + + mcmA_from_checkpoint = ModelConfigMeasurement.read_from_checkpoint( + json.loads(mcmA_json) + ) + + self.assertEqual( + mcmA_from_checkpoint.get_perf_metrics(), self.mcmA.get_perf_metrics() + ) + + # Catchall in case something new is added + self.assertEqual(mcmA_from_checkpoint, self.mcmA) + + ########################################################################### + # Calculation Tests + ########################################################################### + def test_calculate_weighted_percentage_gain(self): + """ + Test that weighted percentages are returned correctly + """ + + # throughput: mcmA: 1000, mcmB: 500 + self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 100) + self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), -50) + + self.mcmA.set_metric_weighting({PerfLatencyP99.tag: 1}) + self.mcmB.set_metric_weighting({PerfLatencyP99.tag: 1}) + + # latency: mcmA: 20, mcmB: 10 + self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), -50) + self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), 100) + + # This illustrates why we need to use score, not percentages to determine + # which model is better. In both cases we will (correctly) report that + # mcmA/B is 25% better than the other, even though they are equal + # + # mcmA has 50% worse throughput, but 100% better latency + # mcmB has 100% better latency, but 50% worse throughput + self.mcmA.set_metric_weighting({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) + self.mcmB.set_metric_weighting({PerfThroughput.tag: 1, PerfLatencyP99.tag: 1}) + self.assertEqual(self.mcmA, self.mcmB) + self.assertEqual(self.mcmA.calculate_weighted_percentage_gain(self.mcmB), 25) + self.assertEqual(self.mcmB.calculate_weighted_percentage_gain(self.mcmA), 25) + + ########################################################################### + # Comparison Tests + ########################################################################### + def test_is_better_than(self): + """ + Test that individual metric comparison works as expected + """ + self.mcmA.set_metric_weighting({PerfThroughput.tag: 1}) + + # throughput: 1000 is better than 500 + self.assertTrue(self.mcmA.is_better_than(self.mcmB)) + self.assertGreater(self.mcmA, self.mcmB) + + self.mcmA.set_metric_weighting({PerfLatencyP99.tag: 1}) + + # latency: 20 is worse than 10 + self.assertFalse(self.mcmA.is_better_than(self.mcmB)) + self.assertLess(self.mcmA, self.mcmB) + + def test_is_better_than_combo(self): + """ + Test that combination metric comparison works as expected + """ + # throuhput: 2000 vs. 1000 (better), latency: 20 vs. 10 (worse) + # with latency bias mcmB is better + self.mcmA.set_metric_weighting({PerfThroughput.tag: 1, PerfLatencyP99.tag: 3}) + + self.assertFalse(self.mcmA.is_better_than(self.mcmB)) + + def test_is_better_than_empty(self): + """ + Test for correct return values when comparing for/against an empty set + """ + empty_mcm0 = ModelConfigMeasurement({}) + empty_mcm1 = ModelConfigMeasurement({}) + + self.assertTrue(self.mcmA.is_better_than(empty_mcm0)) + self.assertFalse(empty_mcm0.is_better_than(self.mcmA)) + self.assertEqual(empty_mcm0, empty_mcm1) + + +if __name__ == "__main__": + unittest.main() diff --git a/genai-perf/tests/test_record.py b/genai-perf/tests/test_record.py new file mode 100644 index 00000000..f928296b --- /dev/null +++ b/genai-perf/tests/test_record.py @@ -0,0 +1,225 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import unittest +from unittest.mock import patch + +from genai_perf.record.record import RecordType + + +class TestRecord(unittest.TestCase): + """ + The record types in the genai_perf.record.types package are contextual + when it uses 'less than' (<) and 'greater than' (>) operators. + + The 'less than' and 'greater than' operators are overloaded to + mean 'worse than' and 'better than' respectively. + + Some record types treat MORE as better + (eg, gpu_free_memory, cpu_available_ram) + Other record types treat LESS as better + (eg, gpu_used_memory, cpu_used_ram) + + So, when comparing two objects of type 'cpu_used_ram' + 12 > 13 is actually true, since 12 'is better than' 13. + """ + + def setUp(self): + ########################################################################### + # Setup & Teardown + ########################################################################### + record_types = RecordType.get_all_record_types() + self.all_record_types = record_types.values() + + self.less_is_better_types = { + record_types[t] + for t in [ + "perf_latency_avg", + "perf_latency_p90", + "perf_latency_p95", + "perf_latency_p99", + "inter_token_latency_min", + "inter_token_latency_max", + "inter_token_latency_avg", + "inter_token_latency_p25", + "inter_token_latency_p50", + "inter_token_latency_p75", + "inter_token_latency_p90", + "inter_token_latency_p95", + "inter_token_latency_p99", + "time_to_first_token_min", + "time_to_first_token_max", + "time_to_first_token_avg", + "time_to_first_token_p25", + "time_to_first_token_p50", + "time_to_first_token_p75", + "time_to_first_token_p90", + "time_to_first_token_p95", + "time_to_first_token_p99", + "gpu_used_memory", + "cpu_used_ram", + "perf_server_compute_infer", + "perf_server_queue", + "perf_client_response_wait", + "perf_server_compute_output", + "perf_client_send_recv", + "perf_server_compute_input", + "gpu_power_usage", + ] + } + + self.more_is_better_types = { + record_types[t] + for t in [ + "perf_throughput", + "output_token_throughput", + "gpu_free_memory", + "gpu_utilization", + "cpu_available_ram", + "gpu_total_memory", + ] + } + + def tearDown(self): + patch.stopall() + + ########################################################################### + # Completeness Tests + ########################################################################### + def test_counts(self): + """ + Make sure that all 'worse than' and 'better than' tests are tested + """ + total_count = len(self.all_record_types) + less_is_better_count = len(self.less_is_better_types) + more_is_better_count = len(self.more_is_better_types) + self.assertEqual(total_count, less_is_better_count + more_is_better_count) + + ########################################################################### + # Basic Operation Tests + ########################################################################### + def test_add(self): + """ + Test __add__ function for + each record type + """ + + for record_type in self.all_record_types: + metric1 = record_type(value=5) + metric2 = record_type(value=9) + metric3 = metric1 + metric2 + self.assertIsInstance(metric3, record_type) + self.assertEqual(metric3.value(), 14) + + def test_sub(self): + """ + Test __sub__ function for + each record type + """ + + for record_type in self.all_record_types: + metric1 = record_type(value=10) + metric2 = record_type(value=3) + metric3 = metric1 - metric2 + self.assertIsInstance(metric3, record_type) + if record_type in self.less_is_better_types: + self.assertEqual(metric3.value(), -7) + elif record_type in self.more_is_better_types: + self.assertEqual(metric3.value(), 7) + + def test_mult(self): + """ + Test __mult__ function for + each record type + """ + + for record_type in self.all_record_types: + metric1 = record_type(value=6) + metric2 = metric1 * 2 + self.assertIsInstance(metric2, record_type) + self.assertEqual(metric2.value(), 12) + + def test_div(self): + """ + Test __div__ function for + each record type + """ + + for record_type in self.all_record_types: + metric1 = record_type(value=60) + metric2 = metric1 / 12 + self.assertIsInstance(metric2, record_type) + self.assertEqual(metric2.value(), 5) + + def test_compare(self): + """ + Test __lt__, __eq__, __gt__ + functions for each record type + """ + + for record_type in self.all_record_types: + metric1 = record_type(value=10.6) + metric2 = record_type(value=3.2) + + # Test __lt__ (True if 1 worse than 2) + if record_type in self.less_is_better_types: + self.assertLess(metric1, metric2) + elif record_type in self.more_is_better_types: + self.assertLess(metric2, metric1) + + # Test __gt__ (True if 1 better than 2) + if record_type in self.less_is_better_types: + self.assertLess(metric1, metric2) + elif record_type in self.more_is_better_types: + self.assertGreater(metric1, metric2) + + # Test __eq__ + metric1 = record_type(value=12) + metric2 = record_type(value=12) + self.assertEqual(metric1, metric2) + + ########################################################################### + # Method Tests + ########################################################################### + def test_value(self): + """ + Test the value method + """ + avg_value = RecordType.get_all_record_types()[ + "perf_latency_p99" + ].value_function()([10, 50, 100, 40]) + + total_value = RecordType.get_all_record_types()[ + "perf_throughput" + ].value_function()([10, 50, 100, 40]) + + self.assertEqual(avg_value, 50) + self.assertEqual(total_value, 200) + + def test_calculate_percentage_gain(self): + """ + Test that percentage gain is calculated correctly + """ + for record_type in self.all_record_types: + metric1 = record_type(value=10) + metric2 = record_type(value=5) + + if record_type in self.less_is_better_types: + self.assertEqual(metric1.calculate_percentage_gain(metric2), -50) + else: + self.assertEqual(metric1.calculate_percentage_gain(metric2), 100) + + +if __name__ == "__main__": + unittest.main()