Skip to content

Commit

Permalink
Move GenAI-Perf profiling to its own subcommand (#745)
Browse files Browse the repository at this point in the history
  • Loading branch information
dyastremsky authored Jul 10, 2024
1 parent cb5710c commit ade066d
Show file tree
Hide file tree
Showing 12 changed files with 169 additions and 88 deletions.
4 changes: 2 additions & 2 deletions src/c++/perf_analyzer/genai-perf/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -162,7 +162,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}
2. Run GenAI-Perf:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind triton \
--backend tensorrtllm \
Expand Down Expand Up @@ -209,7 +209,7 @@ current profile run. This is disabled by default but users can easily enable it
by passing the `--generate-plots` option when running the benchmark:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind triton \
--backend tensorrtllm \
Expand Down
4 changes: 2 additions & 2 deletions src/c++/perf_analyzer/genai-perf/docs/embeddings.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ docker run -it --net=host --rm --gpus=all vllm/vllm-openai:latest --model intflo
To profile embeddings models using GenAI-Perf, use the following command:

```bash
genai-perf \
genai-perf profile \
-m intfloat/e5-mistral-7b-instruct \
--service-kind openai \
--endpoint-type embeddings \
Expand All @@ -73,7 +73,7 @@ additional arguments with the `--extra-inputs` [flag](../README.md#input-options
For example, you could use this command:

```bash
genai-perf \
genai-perf profile \
-m intfloat/e5-mistral-7b-instruct \
--service-kind openai \
--endpoint-type embeddings \
Expand Down
2 changes: 1 addition & 1 deletion src/c++/perf_analyzer/genai-perf/docs/lora.md
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,7 @@ When profiling with multiple models, you can specify how the models should be
assigned to prompts using the `--model-selection-strategy` option:

```bash
genai-perf \
genai-perf profile \
-m lora_adapter1 lora_adapter2 lora_adapter3 \
--model-selection-strategy round_robin
```
Expand Down
2 changes: 1 addition & 1 deletion src/c++/perf_analyzer/genai-perf/docs/rankings.md
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ docker run --gpus all -p 8080:80 -v $volume:/data --pull always ghcr.io/huggingf
To profile ranking models using GenAI-Perf, use the following command:

```bash
genai-perf \
genai-perf profile \
-m BAAI/bge-reranker-large \
--service-kind openai \
--endpoint-type rankings \
Expand Down
8 changes: 4 additions & 4 deletions src/c++/perf_analyzer/genai-perf/docs/tutorial.md
Original file line number Diff line number Diff line change
Expand Up @@ -82,7 +82,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}
2. Run GenAI-Perf:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind triton \
--backend tensorrtllm \
Expand Down Expand Up @@ -166,7 +166,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}
2. Run GenAI-Perf:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind triton \
--backend vllm \
Expand Down Expand Up @@ -232,7 +232,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}
2. Run GenAI-Perf:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind openai \
--endpoint v1/chat/completions \
Expand Down Expand Up @@ -296,7 +296,7 @@ docker run -it --net=host --rm --gpus=all nvcr.io/nvidia/tritonserver:${RELEASE}
2. Run GenAI-Perf:

```bash
genai-perf \
genai-perf profile \
-m gpt2 \
--service-kind openai \
--endpoint v1/completions \
Expand Down
114 changes: 76 additions & 38 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -61,6 +61,14 @@ def to_lowercase(self):
return self.name.lower()


class Subcommand(Enum):
PROFILE = auto()
COMPARE = auto()

def to_lowercase(self):
return self.name.lower()


logger = logging.getLogger(__name__)

_endpoint_type_map = {
Expand All @@ -77,7 +85,7 @@ def _check_model_args(
"""
Check if model name is provided.
"""
if not args.subcommand and not args.model:
if not args.model:
parser.error("The -m/--model option is required and cannot be empty.")
args = _convert_str_to_enum_entry(
args, "model_selection_strategy", ModelSelectionStrategy
Expand All @@ -102,9 +110,8 @@ def _check_compare_args(
"""
Check compare subcommand args
"""
if args.subcommand == "compare":
if not args.config and not args.files:
parser.error("Either the --config or --files option must be specified.")
if not args.config and not args.files:
parser.error("Either the --config or --files option must be specified.")
return args


Expand Down Expand Up @@ -573,13 +580,6 @@ def _add_other_args(parser):
help="An option to enable verbose mode.",
)

other_group.add_argument(
"--version",
action="version",
version="%(prog)s " + __version__,
help=f"An option to print the version and exit.",
)


def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict:
request_inputs = {}
Expand Down Expand Up @@ -626,10 +626,10 @@ def get_extra_inputs_as_dict(args: argparse.Namespace) -> dict:

def _parse_compare_args(subparsers) -> argparse.ArgumentParser:
compare = subparsers.add_parser(
"compare",
Subcommand.COMPARE.to_lowercase(),
description="Subcommand to generate plots that compare multiple profile runs.",
)
compare_group = compare.add_argument_group("Compare")
compare_group = compare.add_argument_group("Input")
mx_group = compare_group.add_mutually_exclusive_group(required=False)
mx_group.add_argument(
"--config",
Expand All @@ -651,6 +651,20 @@ def _parse_compare_args(subparsers) -> argparse.ArgumentParser:
return compare


def _parse_profile_args(subparsers) -> argparse.ArgumentParser:
profile = subparsers.add_parser(
Subcommand.PROFILE.to_lowercase(),
description="Subcommand to profile LLMs and Generative AI models.",
)
_add_endpoint_args(profile)
_add_input_args(profile)
_add_profile_args(profile)
_add_output_args(profile)
_add_other_args(profile)
profile.set_defaults(func=profile_handler)
return profile


### Handlers ###


Expand All @@ -659,12 +673,6 @@ def create_compare_dir() -> None:
os.mkdir(DEFAULT_COMPARE_DIR)


def profile_handler(args, extra_args):
from genai_perf.wrapper import Profiler

Profiler.run(args=args, extra_args=extra_args)


def compare_handler(args: argparse.Namespace):
"""Handles `compare` subcommand workflow."""
if args.files:
Expand All @@ -679,45 +687,75 @@ def compare_handler(args: argparse.Namespace):
plot_manager.generate_plots()


### Entrypoint ###
def profile_handler(args, extra_args):
from genai_perf.wrapper import Profiler

Profiler.run(args=args, extra_args=extra_args)

def parse_args():
argv = sys.argv

### Parser Initialization ###


def init_parsers():
parser = argparse.ArgumentParser(
prog="genai-perf",
description="CLI to profile LLMs and Generative AI models with Perf Analyzer",
formatter_class=argparse.ArgumentDefaultsHelpFormatter,
)
parser.set_defaults(func=profile_handler)

# Conceptually group args for easier visualization
_add_endpoint_args(parser)
_add_input_args(parser)
_add_profile_args(parser)
_add_output_args(parser)
_add_other_args(parser)
parser.add_argument(
"--version",
action="version",
version="%(prog)s " + __version__,
help=f"An option to print the version and exit.",
)

# Add subcommands
subparsers = parser.add_subparsers(
help="List of subparser commands.", dest="subcommand"
)
compare_parser = _parse_compare_args(subparsers)
_ = _parse_compare_args(subparsers)
_ = _parse_profile_args(subparsers)
subparsers.required = True

return parser

# Check for passthrough args

def get_passthrough_args_index(argv: list) -> int:
if "--" in argv:
passthrough_index = argv.index("--")
logger.info(f"Detected passthrough args: {argv[passthrough_index + 1:]}")
else:
passthrough_index = len(argv)

return passthrough_index


def refine_args(
parser: argparse.ArgumentParser, args: argparse.Namespace
) -> argparse.Namespace:
if args.subcommand == Subcommand.PROFILE.to_lowercase():
args = _infer_prompt_source(args)
args = _check_model_args(parser, args)
args = _check_conditional_args(parser, args)
args = _check_load_manager_args(args)
args = _set_artifact_paths(args)
elif args.subcommand == Subcommand.COMPARE.to_lowercase():
args = _check_compare_args(parser, args)
else:
raise ValueError(f"Unknown subcommand: {args.subcommand}")

return args


### Entrypoint ###


def parse_args():
argv = sys.argv

parser = init_parsers()
passthrough_index = get_passthrough_args_index(argv)
args = parser.parse_args(argv[1:passthrough_index])
args = _infer_prompt_source(args)
args = _check_model_args(parser, args)
args = _check_conditional_args(parser, args)
args = _check_compare_args(compare_parser, args)
args = _check_load_manager_args(args)
args = _set_artifact_paths(args)
args = refine_args(parser, args)

return args, argv[passthrough_index + 1 :]
12 changes: 6 additions & 6 deletions src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,7 @@
# For all cases but vllm_openai, it assumes that the server will be on port 9999
#
# This script will run a sweep of all combinations of values in the testing matrix
# by appending those options on to the genai-pa base command
# by appending those options on to the genai-perf base command
#


Expand All @@ -20,11 +20,11 @@
]

base_commands = {
"nim_chat": "genai-perf -s 999 -p 20000 -m llama-2-7b-chat -u http://localhost:9999 --service-kind openai --endpoint-type chat",
"nim_completions": "genai-perf -s 999 -p 20000 -m llama-2-7b -u http://localhost:9999 --service-kind openai --endpoint-type completions",
"vllm_openai": "genai-perf -s 999 -p 20000 -m mistralai/Mistral-7B-v0.1 --service-kind openai --endpoint-type chat",
"triton_tensorrtllm": "genai-perf -s 999 -p 20000 -m llama-2-7b -u 0.0.0.0:9999 --service-kind triton --backend tensorrtllm",
"triton_vllm": "genai-perf -s 999 -p 20000 -m gpt2_vllm --service-kind triton --backend vllm",
"nim_chat": "genai-perf profile -s 999 -p 20000 -m llama-2-7b-chat -u http://localhost:9999 --service-kind openai --endpoint-type chat",
"nim_completions": "genai-perf profile -s 999 -p 20000 -m llama-2-7b -u http://localhost:9999 --service-kind openai --endpoint-type completions",
"vllm_openai": "genai-perf profile -s 999 -p 20000 -m mistralai/Mistral-7B-v0.1 --service-kind openai --endpoint-type chat",
"triton_tensorrtllm": "genai-perf profile -s 999 -p 20000 -m llama-2-7b -u 0.0.0.0:9999 --service-kind triton --backend tensorrtllm",
"triton_vllm": "genai-perf profile -s 999 -p 20000 -m gpt2_vllm --service-kind triton --backend vllm",
}
testname = ""

Expand Down
Loading

0 comments on commit ade066d

Please sign in to comment.