Skip to content

Commit

Permalink
Adding new output fields for LLM
Browse files Browse the repository at this point in the history
  • Loading branch information
nv-braf committed Mar 22, 2024
1 parent 4202055 commit 39442ae
Show file tree
Hide file tree
Showing 3 changed files with 46 additions and 0 deletions.
7 changes: 7 additions & 0 deletions model_analyzer/config/input/config_command_profile.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,7 @@
DEFAULT_GPU_OUTPUT_FIELDS,
DEFAULT_GPUS,
DEFAULT_INFERENCE_OUTPUT_FIELDS,
DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS,
DEFAULT_MAX_RETRIES,
DEFAULT_MODEL_TYPE,
DEFAULT_MODEL_WEIGHTING,
Expand Down Expand Up @@ -1354,6 +1355,12 @@ def _autofill_values(self):
if not self._fields["gpu_output_fields"].is_set_by_user():
self.gpu_output_fields = DEFAULT_REQUEST_RATE_GPU_OUTPUT_FIELDS

# Switch default output fields if user specifies model type of LLM
# and the user didn't specify a custom output field
if self.model_type == "LLM":
if not self._fields["inference_output_fields"].is_set_by_user():
self.inference_output_fields = DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS

new_profile_models = {}
for i, model in enumerate(self.profile_models):
new_model = {"cpu_only": (model.cpu_only() or cpu_only)}
Expand Down
14 changes: 14 additions & 0 deletions model_analyzer/config/input/config_defaults.py
Original file line number Diff line number Diff line change
Expand Up @@ -119,6 +119,20 @@
"perf_throughput",
"perf_latency_p99",
]
DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS = [
"model_name",
"batch_size",
"concurrency",
"model_config_path",
"instance_group",
"max_batch_size",
"satisfies_constraints",
"perf_throughput",
"perf_latency_p99",
"inter_token_latency_p99",
"time_to_first_token_p99",
"output_token_throughput",
]
DEFAULT_REQUEST_RATE_INFERENCE_OUTPUT_FIELDS = [
"model_name",
"batch_size",
Expand Down
25 changes: 25 additions & 0 deletions tests/test_config.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,9 @@
from model_analyzer.cli.cli import CLI
from model_analyzer.config.input.config_command_profile import ConfigCommandProfile
from model_analyzer.config.input.config_command_report import ConfigCommandReport
from model_analyzer.config.input.config_defaults import (
DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS,
)
from model_analyzer.config.input.config_enum import ConfigEnum
from model_analyzer.config.input.config_list_generic import ConfigListGeneric
from model_analyzer.config.input.config_list_numeric import ConfigListNumeric
Expand Down Expand Up @@ -2356,6 +2359,28 @@ def test_mixing_request_rate_and_concurrency(self):
with self.assertRaises(TritonModelAnalyzerException):
self._evaluate_config(args, yaml_content, subcommand="profile")

def test_model_type_llm(self):
"""
Test that model type of LLM chooses the correct inference outputs
"""
args = [
"model-analyzer",
"profile",
"--model-repository",
"cli-repository",
"--profile-models",
"modelA",
"--model-type",
"LLM",
]
yaml_content = ""

config = self._evaluate_config(args, yaml_content)

self.assertEqual(
config.inference_output_fields, DEFAULT_LLM_INFERENCE_OUTPUT_FIELDS
)

def _test_request_rate_config_conflicts(
self, base_args: List[Any], yaml_content: str
) -> None:
Expand Down

0 comments on commit 39442ae

Please sign in to comment.