From 064f1e3682a2d0645f62a027775576e499393235 Mon Sep 17 00:00:00 2001 From: Brian Raf <92820864+nv-braf@users.noreply.github.com> Date: Wed, 3 Apr 2024 09:26:40 -0700 Subject: [PATCH] Add genai_perf CLI options to MA (#854) * Added support for genai_perf CLI * Remove dead code * Removing genai_perf collateral * Fixing codeQL issue * Adding streaming to genai_perf_config --- .../generate/brute_run_config_generator.py | 2 +- .../config/input/config_command_profile.py | 19 ++ .../objects/config_model_profile_spec.py | 18 ++ model_analyzer/config/run/run_config.py | 11 +- model_analyzer/constants.py | 3 +- .../perf_analyzer/genai_perf_config.py | 206 ++++++++++++++++++ model_analyzer/perf_analyzer/perf_analyzer.py | 17 +- tests/test_cli.py | 1 + 8 files changed, 266 insertions(+), 11 deletions(-) create mode 100755 model_analyzer/perf_analyzer/genai_perf_config.py diff --git a/model_analyzer/config/generate/brute_run_config_generator.py b/model_analyzer/config/generate/brute_run_config_generator.py index d226811aa..61d1accd4 100755 --- a/model_analyzer/config/generate/brute_run_config_generator.py +++ b/model_analyzer/config/generate/brute_run_config_generator.py @@ -129,7 +129,7 @@ def _generate_subset( self._send_results_to_generator(index) def _make_run_config(self) -> RunConfig: - run_config = RunConfig(self._triton_env) + run_config = RunConfig(self._triton_env, self._models[0].genai_perf_flags()) for index in range(len(self._models)): run_config.add_model_run_config(self._curr_model_run_configs[index]) return run_config diff --git a/model_analyzer/config/input/config_command_profile.py b/model_analyzer/config/input/config_command_profile.py index d4650a040..fc13cdb08 100755 --- a/model_analyzer/config/input/config_command_profile.py +++ b/model_analyzer/config/input/config_command_profile.py @@ -31,6 +31,7 @@ ) from model_analyzer.constants import LOGGER_NAME from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException +from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig from model_analyzer.perf_analyzer.perf_config import PerfAnalyzerConfig from model_analyzer.record.record import RecordType from model_analyzer.triton.server.server_config import TritonServerConfig @@ -375,6 +376,10 @@ def _add_profile_models_configs(self): } ) + genai_perf_flags_scheme = ConfigObject( + schema={k: ConfigPrimitive(str) for k in GenaiPerfConfig.allowed_keys()} + ) + triton_server_environment_scheme = ConfigObject( schema={"*": ConfigPrimitive(str)} ) @@ -455,6 +460,13 @@ def _add_profile_models_configs(self): description="Allows custom configuration of the perf analyzer instances used by model analyzer.", ) ) + self._add_config( + ConfigField( + "genai_perf_flags", + field_type=genai_perf_flags_scheme, + description="Allows custom configuration of the GenAI Perf instances used by model analyzer.", + ) + ) self._add_config( ConfigField( "triton_server_flags", @@ -666,6 +678,7 @@ def _add_profile_models_configs(self): "weighting": ConfigPrimitive(type_=int), "model_config_parameters": model_config_fields, "perf_analyzer_flags": perf_analyzer_flags_scheme, + "genai_perf_flags": genai_perf_flags_scheme, "triton_server_flags": triton_server_flags_scheme, "triton_server_environment": triton_server_environment_scheme, "triton_docker_args": triton_docker_args_scheme, @@ -1559,6 +1572,12 @@ def _autofill_values(self): else: new_model["perf_analyzer_flags"] = model.perf_analyzer_flags() + # GenAI Perf flags + if not model.genai_perf_flags(): + new_model["genai_perf_flags"] = self.genai_perf_flags + else: + new_model["genai_perf_flags"] = model.genai_perf_flags() + # triton server flags if not model.triton_server_flags(): new_model["triton_server_flags"] = self.triton_server_flags diff --git a/model_analyzer/config/input/objects/config_model_profile_spec.py b/model_analyzer/config/input/objects/config_model_profile_spec.py index d45e68d41..250cc4980 100755 --- a/model_analyzer/config/input/objects/config_model_profile_spec.py +++ b/model_analyzer/config/input/objects/config_model_profile_spec.py @@ -33,6 +33,7 @@ def __init__( parameters=None, model_config_parameters=None, perf_analyzer_flags=None, + genai_perf_flags=None, triton_server_flags=None, triton_server_environment=None, triton_docker_args=None, @@ -58,6 +59,9 @@ def __init__( perf_analyzer_flags : dict The custom perf analyzer configuration for this model + genai_perf_flags : dict + The custom GenAI perf configuration + for this model triton_server_flags : dict The configuration for the triton server instance launched for this model @@ -78,6 +82,7 @@ def __init__( self._parameters = parameters self._model_config_parameters = model_config_parameters self._perf_analyzer_flags = perf_analyzer_flags + self._genai_perf_flags = genai_perf_flags self._triton_server_flags = triton_server_flags self._triton_server_environment = triton_server_environment self._triton_docker_args = triton_docker_args @@ -162,6 +167,16 @@ def perf_analyzer_flags(self): return self._perf_analyzer_flags + def genai_perf_flags(self): + """ + Returns + ------- + dict: + the genai_perf_flags + """ + + return self._genai_perf_flags + def triton_server_flags(self): """ Returns @@ -304,4 +319,7 @@ def __repr__(self): if self._perf_analyzer_flags: model_object["perf_analyzer_flags"] = self._perf_analyzer_flags + if self._genai_perf_flags: + model_object["genai_perf_flags"] = self._genai_perf_flags + return str(model_object) diff --git a/model_analyzer/config/run/run_config.py b/model_analyzer/config/run/run_config.py index 29efcaf08..9b53d8266 100755 --- a/model_analyzer/config/run/run_config.py +++ b/model_analyzer/config/run/run_config.py @@ -17,6 +17,7 @@ from typing import List from model_analyzer.config.run.model_run_config import ModelRunConfig +from model_analyzer.perf_analyzer.genai_perf_config import GenaiPerfConfig class RunConfig: @@ -25,16 +26,21 @@ class RunConfig: at the same time in Perf Analyzer """ - def __init__(self, triton_env): + def __init__(self, triton_env, genai_perf_flags=None): """ Parameters ---------- triton_env : dict A dictionary of environment variables to set when launching tritonserver + + genai_perf_flags: dict + The set of flags used when calling genai_perf for LLM models """ self._triton_env = triton_env + self._genai_perf_config = GenaiPerfConfig() + self._genai_perf_config.update_config(genai_perf_flags) self._model_run_configs: List[ModelRunConfig] = [] def add_model_run_config(self, model_run_config): @@ -103,6 +109,9 @@ def triton_environment(self): return self._triton_env + def genai_perf_config(self): + return self._genai_perf_config + def models_name(self): """Returns a single comma-joined name of the original model names""" return ",".join([mrc.model_name() for mrc in self.model_run_configs()]) diff --git a/model_analyzer/constants.py b/model_analyzer/constants.py index 4fd91a480..6d7682515 100755 --- a/model_analyzer/constants.py +++ b/model_analyzer/constants.py @@ -71,5 +71,6 @@ # Model analyzer package name PACKAGE_NAME = "triton-model-analyzer" -# GENAI-PERF CSV +# GENAI-PERF GENAI_PERF_CSV = "profile_export_genai_perf.csv" +GENAI_PERF_COLLATERAL = ["llm_inputs.json", "profile_export.json"] diff --git a/model_analyzer/perf_analyzer/genai_perf_config.py b/model_analyzer/perf_analyzer/genai_perf_config.py new file mode 100755 index 000000000..9e5a77201 --- /dev/null +++ b/model_analyzer/perf_analyzer/genai_perf_config.py @@ -0,0 +1,206 @@ +#!/usr/bin/env python3 + +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +from model_analyzer.model_analyzer_exceptions import TritonModelAnalyzerException + + +class GenaiPerfConfig: + """ + A config class to set arguments to the genai_perf. + An argument set to None will use the genai_perf's default. + """ + + genai_perf_args = [ + "backend", + "endpoint", + "service-kind", + "url", + "expected-output-tokens", + "input-dataset", + "input-tokens-mean", + "input-tokens-stddev", + "input-type", + "num-of-output-prompts", + "random-seed", + "streaming", + "tokenizer", + ] + + boolean_args = ["streaming"] + + def __init__(self): + """ + Construct a GenaiPerfConfig + """ + + self._args = {k: None for k in self.genai_perf_args} + + @classmethod + def allowed_keys(cls): + """ + Returns + ------- + list of str + The keys that are allowed to be + passed into perf_analyzer + """ + + return cls.genai_perf_args + + def update_config(self, params=None): + """ + Allows setting values from a params dict + + Parameters + ---------- + params: dict + keys are allowed args to perf_analyzer + """ + + if params and type(params) is dict: + for key in params: + self[key] = params[key] + + @classmethod + def from_dict(cls, genai_perf_config_dict): + genai_perf_config = GenaiPerfConfig() + for key in [ + "_args", + ]: + if key in genai_perf_config_dict: + setattr(genai_perf_config, key, genai_perf_config_dict[key]) + return genai_perf_config + + def representation(self): + """ + Returns + ------- + str + a string representation of the Genai Perf config + that removes values which can vary between + runs, but should be ignored when determining + if a previous (checkpointed) run can be used + """ + cli_string = self.to_cli_string() + + return cli_string + + def to_cli_string(self) -> str: + """ + Utility function to convert a config into a + string of arguments to the perf_analyzer with CLI. + + Returns + ------- + str + cli command string consisting of all arguments + to the perf_analyzer set in the config, without + the executable name. + """ + + # single dashed options, then verbose flags, then main args + args = [] + args.extend(self._parse_options()) + + return " ".join(args) + + def _parse_options(self): + """ + Parse the genai perf args + """ + temp_args = [] + for key, value in self._args.items(): + if key in self.boolean_args: + temp_args = self._parse_boolean_args(key, value, temp_args) + elif value: + temp_args.append(f"--{key}={value}") + return temp_args + + def _parse_boolean_args(self, key, value, temp_args): + """ + Parse genai perf args that should not add a value to the cli string + """ + assert type(value) in [ + str, + type(None), + ], f"Data type for arg {key} must be a (boolean) string instead of {type(value)}" + if value != None and value.lower() == "true": + temp_args.append(f"--{key}") + return temp_args + + def __getitem__(self, key): + """ + Gets an arguments value in config + + Parameters + ---------- + key : str + The name of the argument to the genai perf config + + Returns + ------- + object + The value that the argument is set to in this config + + Raises + ------ + KeyError + If argument not found in the config + """ + + if key in self._args: + return self._args[key] + else: + raise TritonModelAnalyzerException( + f"Key {key} does not exist in genai_perf_flags." + ) + + def __setitem__(self, key, value): + """ + Sets an arguments value in config + after checking if defined/supported. + + Parameters + ---------- + key : str + The name of the argument in genai_perf + value : (any) + The value to which the argument is being set + + Raises + ------ + TritonModelAnalyzerException + If key is unsupported or undefined in the + config class + """ + + if key in self._args: + self._args[key] = value + else: + raise TritonModelAnalyzerException( + f"The argument '{key}' to the genai_perf " + "is not supported by model analyzer." + ) + + def __contains__(self, key): + """ + Returns + ------- + True if key is in perf_config i.e. the key is a + genai perf config argument + """ + + return key in GenaiPerfConfig.allowed_keys() diff --git a/model_analyzer/perf_analyzer/perf_analyzer.py b/model_analyzer/perf_analyzer/perf_analyzer.py index d59e79279..b301ee97e 100755 --- a/model_analyzer/perf_analyzer/perf_analyzer.py +++ b/model_analyzer/perf_analyzer/perf_analyzer.py @@ -29,6 +29,7 @@ from model_analyzer.config.input.config_defaults import DEFAULT_MODEL_TYPE from model_analyzer.constants import ( + GENAI_PERF_COLLATERAL, GENAI_PERF_CSV, INTERVAL_SLEEP_TIME, LOGGER_NAME, @@ -323,15 +324,10 @@ def _get_cmd(self): return cmd def _get_single_model_cmd(self, index): - # TODO: TMA-1771 - hook up the user defined CLI options if self._model_type == "LLM": - cmd = [ - "genai-perf", - "-m", - self._config.models_name(), - "--streaming", - "--", - ] + cmd = ["genai-perf", "-m", self._config.models_name()] + cmd += self._get_genai_perf_cli_command(index).replace("=", " ").split() + cmd += ["--"] cmd += ( self._get_pa_cli_command(index, exclude_model_name=True) .replace("=", " ") @@ -352,6 +348,9 @@ def _get_pa_cli_command(self, index, exclude_model_name=False): .to_cli_string(exclude_model_name) ) + def _get_genai_perf_cli_command(self, index): + return self._config.genai_perf_config().to_cli_string() + def _create_env(self, env): perf_analyzer_env = os.environ.copy() @@ -582,6 +581,8 @@ def _parse_llm_outputs(self, metrics): ) os.remove(GENAI_PERF_CSV) + for filename in GENAI_PERF_COLLATERAL: + os.remove(filename) def _extract_perf_records_from_row( self, requested_metrics: List[Record], row_metrics: Dict[str, str] diff --git a/tests/test_cli.py b/tests/test_cli.py index 72e81c786..33a0dd4e0 100755 --- a/tests/test_cli.py +++ b/tests/test_cli.py @@ -165,6 +165,7 @@ def get_test_options(): OptionStruct("noop", "yaml_profile", "weighting"), OptionStruct("noop", "yaml_profile", "triton_server_flags"), OptionStruct("noop", "yaml_profile", "perf_analyzer_flags"), + OptionStruct("noop", "yaml_profile", "genai_perf_flags"), OptionStruct("noop", "yaml_profile", "triton_docker_labels"), OptionStruct("noop", "yaml_profile", "triton_server_environment"), OptionStruct("noop", "yaml_profile", "triton_docker_args"),