Add genai_perf CLI options to MA (#854)

* Added support for genai_perf CLI * Remove dead code * Removing genai_perf collateral * Fixing codeQL issue * Adding streaming to genai_perf_config
triton-inference-server · Apr 8, 2024 · 064f1e3 · 064f1e3
1 parent db55ca4
commit 064f1e3
Show file tree

Hide file tree

Showing 8 changed files with 266 additions and 11 deletions.
diff --git a/model_analyzer/config/generate/brute_run_config_generator.py b/model_analyzer/config/generate/brute_run_config_generator.py
@@ -129,7 +129,7 @@ def _generate_subset(
             self._send_results_to_generator(index)
 
     def _make_run_config(self) -> RunConfig:
-        run_config = RunConfig(self._triton_env)
+        run_config = RunConfig(self._triton_env, self._models[0].genai_perf_flags())
         for index in range(len(self._models)):
             run_config.add_model_run_config(self._curr_model_run_configs[index])
         return run_config

diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py
@@ -31,6 +31,7 @@
 )
 from model_analyzer.constants import LOGGER_NAME
 from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
+from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
 from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig
 from model_analyzer.record.record import RecordType
 from model_analyzer.triton.server.server_config import TritonServerConfig
@@ -375,6 +376,10 @@ def _add_profile_models_configs(self):
             }
         )
 
+        genai_perf_flags_scheme = ConfigObject(
+            schema={k: ConfigPrimitive(str) for k in GenaiPerfConfig.allowed_keys()}
+        )
+
         triton_server_environment_scheme = ConfigObject(
             schema={"*": ConfigPrimitive(str)}
         )
@@ -455,6 +460,13 @@ def _add_profile_models_configs(self):
                 description="Allows custom configuration of the perf analyzer instances used by model analyzer.",
             )
         )
+        self._add_config(
+            ConfigField(
+                "genai_perf_flags",
+                field_type=genai_perf_flags_scheme,
+                description="Allows custom configuration of the GenAI Perf instances used by model analyzer.",
+            )
+        )
         self._add_config(
             ConfigField(
                 "triton_server_flags",
@@ -666,6 +678,7 @@ def _add_profile_models_configs(self):
                         "weighting": ConfigPrimitive(type_=int),
                         "model_config_parameters": model_config_fields,
                         "perf_analyzer_flags": perf_analyzer_flags_scheme,
+                        "genai_perf_flags": genai_perf_flags_scheme,
                         "triton_server_flags": triton_server_flags_scheme,
                         "triton_server_environment": triton_server_environment_scheme,
                         "triton_docker_args": triton_docker_args_scheme,
@@ -1559,6 +1572,12 @@ def _autofill_values(self):
             else:
                 new_model["perf_analyzer_flags"] = model.perf_analyzer_flags()
 
+            # GenAI Perf flags
+            if not model.genai_perf_flags():
+                new_model["genai_perf_flags"] = self.genai_perf_flags
+            else:
+                new_model["genai_perf_flags"] = model.genai_perf_flags()
+
             # triton server flags
             if not model.triton_server_flags():
                 new_model["triton_server_flags"] = self.triton_server_flags

diff --git a/model_analyzer/config/input/objects/config_model_profile_spec.py b/model_analyzer/config/input/objects/config_model_profile_spec.py
@@ -33,6 +33,7 @@ def __init__(
         parameters=None,
         model_config_parameters=None,
         perf_analyzer_flags=None,
+        genai_perf_flags=None,
         triton_server_flags=None,
         triton_server_environment=None,
         triton_docker_args=None,
@@ -58,6 +59,9 @@ def __init__(
         perf_analyzer_flags : dict
             The custom perf analyzer configuration
             for this model
+        genai_perf_flags : dict
+            The custom GenAI perf configuration
+            for this model
         triton_server_flags : dict
             The configuration for the triton server instance launched
             for this model
@@ -78,6 +82,7 @@ def __init__(
         self._parameters = parameters
         self._model_config_parameters = model_config_parameters
         self._perf_analyzer_flags = perf_analyzer_flags
+        self._genai_perf_flags = genai_perf_flags
         self._triton_server_flags = triton_server_flags
         self._triton_server_environment = triton_server_environment
         self._triton_docker_args = triton_docker_args
@@ -162,6 +167,16 @@ def perf_analyzer_flags(self):
 
         return self._perf_analyzer_flags
 
+    def genai_perf_flags(self):
+        """
+        Returns
+        -------
+        dict:
+             the genai_perf_flags
+        """
+
+        return self._genai_perf_flags
+
     def triton_server_flags(self):
         """
         Returns
@@ -304,4 +319,7 @@ def __repr__(self):
         if self._perf_analyzer_flags:
             model_object["perf_analyzer_flags"] = self._perf_analyzer_flags
 
+        if self._genai_perf_flags:
+            model_object["genai_perf_flags"] = self._genai_perf_flags
+
         return str(model_object)
diff --git a/model_analyzer/config/run/run_config.py b/model_analyzer/config/run/run_config.py
@@ -17,6 +17,7 @@
 from typing import List
 
 from model_analyzer.config.run.model_run_config import ModelRunConfig
+from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig
 
 
 class RunConfig:
@@ -25,16 +26,21 @@ class RunConfig:
     at the same time in Perf Analyzer
     """
 
-    def __init__(self, triton_env):
+    def __init__(self, triton_env, genai_perf_flags=None):
         """
         Parameters
         ----------
         triton_env : dict
             A dictionary of environment variables to set
             when launching tritonserver
+
+        genai_perf_flags: dict
+            The set of flags used when calling genai_perf for LLM models
         """
 
         self._triton_env = triton_env
+        self._genai_perf_config = GenaiPerfConfig()
+        self._genai_perf_config.update_config(genai_perf_flags)
         self._model_run_configs: List[ModelRunConfig] = []
 
     def add_model_run_config(self, model_run_config):
@@ -103,6 +109,9 @@ def triton_environment(self):
 
         return self._triton_env
 
+    def genai_perf_config(self):
+        return self._genai_perf_config
+
     def models_name(self):
         """Returns a single comma-joined name of the original model names"""
         return ",".join([mrc.model_name() for mrc in self.model_run_configs()])

diff --git a/model_analyzer/constants.py b/model_analyzer/constants.py
@@ -71,5 +71,6 @@
 # Model analyzer package name
 PACKAGE_NAME = "triton-model-analyzer"
 
-# GENAI-PERF CSV
+# GENAI-PERF
 GENAI_PERF_CSV = "profile_export_genai_perf.csv"
+GENAI_PERF_COLLATERAL = ["llm_inputs.json", "profile_export.json"]
diff --git a/model_analyzer/perf_analyzer/genai_perf_config.py b/model_analyzer/perf_analyzer/genai_perf_config.py
@@ -0,0 +1,206 @@
+#!/usr/bin/env python3
+
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Licensed under the Apache License, Version 2.0 (the "License");
+# you may not use this file except in compliance with the License.
+# You may obtain a copy of the License at
+#
+#     http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException
+
+
+class GenaiPerfConfig:
+    """
+    A config class to set arguments to the genai_perf.
+    An argument set to None will use the genai_perf's default.
+    """
+
+    genai_perf_args = [
+        "backend",
+        "endpoint",
+        "service-kind",
+        "url",
+        "expected-output-tokens",
+        "input-dataset",
+        "input-tokens-mean",
+        "input-tokens-stddev",
+        "input-type",
+        "num-of-output-prompts",
+        "random-seed",
+        "streaming",
+        "tokenizer",
+    ]
+
+    boolean_args = ["streaming"]
+
+    def __init__(self):
+        """
+        Construct a GenaiPerfConfig
+        """
+
+        self._args = {k: None for k in self.genai_perf_args}
+
+    @classmethod
+    def allowed_keys(cls):
+        """
+        Returns
+        -------
+        list of str
+            The keys that are allowed to be
+            passed into perf_analyzer
+        """
+
+        return cls.genai_perf_args
+
+    def update_config(self, params=None):
+        """
+        Allows setting values from a params dict
+
+        Parameters
+        ----------
+        params: dict
+            keys are allowed args to perf_analyzer
+        """
+
+        if params and type(params) is dict:
+            for key in params:
+                self[key] = params[key]
+
+    @classmethod
+    def from_dict(cls, genai_perf_config_dict):
+        genai_perf_config = GenaiPerfConfig()
+        for key in [
+            "_args",
+        ]:
+            if key in genai_perf_config_dict:
+                setattr(genai_perf_config, key, genai_perf_config_dict[key])
+        return genai_perf_config
+
+    def representation(self):
+        """
+        Returns
+        -------
+        str
+            a string representation of the Genai Perf config
+            that removes values which can vary between
+            runs, but should be ignored when determining
+            if a previous (checkpointed) run can be used
+        """
+        cli_string = self.to_cli_string()
+
+        return cli_string
+
+    def to_cli_string(self) -> str:
+        """
+        Utility function to convert a config into a
+        string of arguments to the perf_analyzer with CLI.
+
+        Returns
+        -------
+        str
+            cli command string consisting of all arguments
+            to the perf_analyzer set in the config, without
+            the executable name.
+        """
+
+        # single dashed options, then verbose flags, then main args
+        args = []
+        args.extend(self._parse_options())
+
+        return " ".join(args)
+
+    def _parse_options(self):
+        """
+        Parse the genai perf args
+        """
+        temp_args = []
+        for key, value in self._args.items():
+            if key in self.boolean_args:
+                temp_args = self._parse_boolean_args(key, value, temp_args)
+            elif value:
+                temp_args.append(f"--{key}={value}")
+        return temp_args
+
+    def _parse_boolean_args(self, key, value, temp_args):
+        """
+        Parse genai perf args that should not add a value to the cli string
+        """
+        assert type(value) in [
+            str,
+            type(None),
+        ], f"Data type for arg {key} must be a (boolean) string instead of {type(value)}"
+        if value != None and value.lower() == "true":
+            temp_args.append(f"--{key}")
+        return temp_args
+
+    def __getitem__(self, key):
+        """
+        Gets an arguments value in config
+
+        Parameters
+        ----------
+        key : str
+            The name of the argument to the genai perf config
+
+        Returns
+        -------
+        object
+            The value that the argument is set to in this config
+
+        Raises
+        ------
+        KeyError
+            If argument not found in the config
+        """
+
+        if key in self._args:
+            return self._args[key]
+        else:
+            raise TritonModelAnalyzerException(
+                f"Key {key} does not exist in genai_perf_flags."
+            )
+
+    def __setitem__(self, key, value):
+        """
+        Sets an arguments value in config
+        after checking if defined/supported.
+
+        Parameters
+        ----------
+        key : str
+            The name of the argument in genai_perf
+        value : (any)
+            The value to which the argument is being set
+
+        Raises
+        ------
+        TritonModelAnalyzerException
+            If key is unsupported or undefined in the
+            config class
+        """
+
+        if key in self._args:
+            self._args[key] = value
+        else:
+            raise TritonModelAnalyzerException(
+                f"The argument '{key}' to the genai_perf "
+                "is not supported by model analyzer."
+            )
+
+    def __contains__(self, key):
+        """
+        Returns
+        -------
+        True if key is in perf_config i.e. the key is a
+        genai perf config argument
+        """
+
+        return key in GenaiPerfConfig.allowed_keys()