Skip to content

Commit

Permalink
Add additional Cli parsing (#474)
Browse files Browse the repository at this point in the history
* Adding subprocess call and more arg parsing

* Add url option

* Remove input length option

* Update help messages, mistakes in args, removed sync and async options, updated version option behavior, cleaned up code

* Refactored code to clean things up

* Use metavar and dest in cli to help with url option
  • Loading branch information
debermudez authored Feb 28, 2024
1 parent 15a5bee commit 820539b
Show file tree
Hide file tree
Showing 4 changed files with 178 additions and 69 deletions.
1 change: 0 additions & 1 deletion src/c++/perf_analyzer/genai-pa/genai_pa/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down
170 changes: 120 additions & 50 deletions src/c++/perf_analyzer/genai-pa/genai_pa/parser.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -27,28 +26,52 @@

import argparse
import logging
from pathlib import Path

from genai_pa.constants import LOGGER_NAME

logger = logging.getLogger(LOGGER_NAME)


def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""
Prune the parsed arguments to remove args with None or False values.
"""
print(args)
return argparse.Namespace(
**{k: v for k, v in vars(args).items() if v is not None if v is not False}
)


def update_load_manager_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""
Update GenAI-PA load manager attributes to PA format
"""
for attr_key in ["concurrency", "request_rate"]:
attr_val = getattr(args, attr_key)
if attr_val is not None:
setattr(args, f"{attr_key}_range", f"{attr_val}")
delattr(args, attr_key)
return args


### Handlers ###


# NOTE: Placeholder
def handler(args):
from genai_pa.wrapper import Profiler

Profiler.run(
model=args.model,
)
Profiler.run(model=args.model, args=args)


### Parsers ###


def add_model_args(parser):
parser.add_argument(
model_group = parser.add_argument_group("Model")

model_group.add_argument(
"-m",
"--model",
type=str,
Expand All @@ -58,64 +81,114 @@ def add_model_args(parser):


def add_profile_args(parser):
parser.add_argument(
profile_group = parser.add_argument_group("Profiling")
load_management_group = profile_group.add_mutually_exclusive_group()

profile_group.add_argument(
"-b",
"--batch-size",
type=int,
default=1,
required=False,
help="The batch size / concurrency to benchmark. (Default: 1)",
help="The batch size to benchmark. The default value is 1.",
)
parser.add_argument(
"--input-length",
load_management_group.add_argument(
"--concurrency",
type=int,
default=128,
required=False,
help="The input length (tokens) to use for benchmarking LLMs. (Default: 128)",
help="Sets the concurrency value to benchmark.",
)
parser.add_argument(
"--output-length",
profile_group.add_argument(
"--max-threads",
type=int,
default=128,
default=16,
required=False,
help="Sets the maximum number of threads that will be "
"created for providing desired concurrency or request rate. "
"The default value is 16.",
)
# TODO: necessary?
# parser.add_argument(
# "--output-length",
# type=int,
# default=128,
# required=False,
# help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
# )
profile_group.add_argument(
"--profile-export-file",
type=Path,
default="profile_export.json",
help="Specifies the path where the profile export will be "
"generated. By default, the profile export will not be "
"generated.",
)
load_management_group.add_argument(
"--request-rate",
type=float,
required=False,
help="Sets the request rate for the load generated by PA. ",
)
profile_group.add_argument(
"--service-kind",
type=str,
choices=["triton", "openai"],
default="triton",
required=False,
help="Describes the kind of service perf_analyzer will "
'generate load for. The options are "triton" and '
'"openai". The default value is "triton".',
)
profile_group.add_argument(
"--streaming",
action="store_true",
required=False,
help=f"Enables the use of the streaming API.",
)
profile_group.add_argument(
"--version",
action="store_true",
required=False,
help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
help=f"Prints the version and exits.",
)


def add_endpoint_args(parser):
parser.add_argument(
endpoint_group = parser.add_argument_group("Endpoint")

endpoint_group.add_argument(
"-u",
"--url",
type=str,
default="localhost:8001",
required=False,
dest="u",
metavar="URL",
help="URL of the endpoint to target for benchmarking.",
)
parser.add_argument(
"--provider",
type=str,
choices=["triton", "openai"],
required=False,
help="Provider format/schema to use for benchmarking.",
)


def add_dataset_args(parser):
parser.add_argument(
"--dataset",
type=str,
default="OpenOrca",
choices=["OpenOrca", "cnn_dailymail"],
required=False,
help="HuggingFace dataset to use for the benchmark.",
)
parser.add_argument(
"--tokenizer",
type=str,
default="auto",
choices=["auto"],
required=False,
help="The HuggingFace tokenizer to use to interpret token metrics from final text results",
)
pass

dataset_group = parser.add_argument_group("Dataset")
# TODO: Do we want to remove dataset and tokenizer?
# dataset_group.add_argument(
# "--dataset",
# type=str,
# default="OpenOrca",
# choices=["OpenOrca", "cnn_dailymail"],
# required=False,
# help="HuggingFace dataset to use for the benchmark.",
# )
# dataset_group.add_argument(
# "--tokenizer",
# type=str,
# default="auto",
# choices=["auto"],
# required=False,
# help="The HuggingFace tokenizer to use to interpret token metrics from final text results",
# )


### Entrypoint ###
Expand All @@ -125,22 +198,19 @@ def add_dataset_args(parser):
def parse_args(argv=None):
parser = argparse.ArgumentParser(
prog="genai-pa",
description="CLI to profile LLMs and Generative AI models with PA",
description="CLI to profile LLMs and Generative AI models with Perf Analyzer",
)
parser.set_defaults(func=handler)

# Conceptually group args for easier visualization
model_group = parser.add_argument_group("Model")
add_model_args(model_group)
add_model_args(parser)
add_profile_args(parser)
add_endpoint_args(parser)
add_dataset_args(parser)

profile_group = parser.add_argument_group("Profiling")
add_profile_args(profile_group)
args = parser.parse_args(argv)

endpoint_group = parser.add_argument_group("Endpoint")
add_endpoint_args(endpoint_group)
args = update_load_manager_args(args)
args = prune_args(args)

dataset_group = parser.add_argument_group("Dataset")
add_dataset_args(dataset_group)

args = parser.parse_args(argv)
return args
36 changes: 36 additions & 0 deletions src/c++/perf_analyzer/genai-pa/genai_pa/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from pathlib import Path


def remove_file(file: Path):
if file.is_file():
file.unlink()


def convert_option_name(name: str) -> str:
return name.replace("_", "-")
40 changes: 22 additions & 18 deletions src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,33 +24,37 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import logging
import subprocess

import genai_pa.utils as utils
from genai_pa.constants import LOGGER_NAME

logger = logging.getLogger(LOGGER_NAME)


class Profiler:
@staticmethod
def run(model):
# TODO: Replace with other plumbing
input_file = "/tmp/input_data.json"
with open(input_file, "w") as f:
data = {"data": [{"text_input": ["hi"]}]}
json.dump(data, f)
def run(model, args=None):
skip_args = ["model", "func"]
if hasattr(args, "version"):
cmd = f"perf_analyzer --version"
else:
utils.remove_file(args.profile_export_file)

cmd = [
"perf_analyzer",
"-i",
"grpc",
"--streaming",
"-m",
model,
"--input-data",
input_file,
]
cmd = f"perf_analyzer -m {model} --async "
for arg, value in vars(args).items():
if arg in skip_args:
pass
elif value is True:
cmd += f"--{arg} "
elif arg == "batch_size":
cmd += f"-b {value} "
else:
if len(arg) == 1:
cmd += f"-{arg} {value}"
else:
arg = utils.convert_option_name(arg)
cmd += f"--{arg} {value} "
logger.info(f"Running Perf Analyzer : '{cmd}'")
subprocess.run(cmd)
subprocess.run(cmd, shell=True)

0 comments on commit 820539b

Please sign in to comment.