diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py index 09686f38f..8726fb454 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py old mode 100755 new mode 100644 index 236891b78..76cf3f459 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,11 +26,35 @@ import argparse import logging +from pathlib import Path from genai_pa.constants import LOGGER_NAME logger = logging.getLogger(LOGGER_NAME) + +def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: + """ + Prune the parsed arguments to remove args with None or False values. + """ + print(args) + return argparse.Namespace( + **{k: v for k, v in vars(args).items() if v is not None if v is not False} + ) + + +def update_load_manager_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: + """ + Update GenAI-PA load manager attributes to PA format + """ + for attr_key in ["concurrency", "request_rate"]: + attr_val = getattr(args, attr_key) + if attr_val is not None: + setattr(args, f"{attr_key}_range", f"{attr_val}") + delattr(args, attr_key) + return args + + ### Handlers ### @@ -39,16 +62,16 @@ def handler(args): from genai_pa.wrapper import Profiler - Profiler.run( - model=args.model, - ) + Profiler.run(model=args.model, args=args) ### Parsers ### def add_model_args(parser): - parser.add_argument( + model_group = parser.add_argument_group("Model") + + model_group.add_argument( "-m", "--model", type=str, @@ -58,64 +81,114 @@ def add_model_args(parser): def add_profile_args(parser): - parser.add_argument( + profile_group = parser.add_argument_group("Profiling") + load_management_group = profile_group.add_mutually_exclusive_group() + + profile_group.add_argument( "-b", "--batch-size", type=int, default=1, required=False, - help="The batch size / concurrency to benchmark. (Default: 1)", + help="The batch size to benchmark. The default value is 1.", ) - parser.add_argument( - "--input-length", + load_management_group.add_argument( + "--concurrency", type=int, - default=128, required=False, - help="The input length (tokens) to use for benchmarking LLMs. (Default: 128)", + help="Sets the concurrency value to benchmark.", ) - parser.add_argument( - "--output-length", + profile_group.add_argument( + "--max-threads", type=int, - default=128, + default=16, + required=False, + help="Sets the maximum number of threads that will be " + "created for providing desired concurrency or request rate. " + "The default value is 16.", + ) + # TODO: necessary? + # parser.add_argument( + # "--output-length", + # type=int, + # default=128, + # required=False, + # help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)", + # ) + profile_group.add_argument( + "--profile-export-file", + type=Path, + default="profile_export.json", + help="Specifies the path where the profile export will be " + "generated. By default, the profile export will not be " + "generated.", + ) + load_management_group.add_argument( + "--request-rate", + type=float, + required=False, + help="Sets the request rate for the load generated by PA. ", + ) + profile_group.add_argument( + "--service-kind", + type=str, + choices=["triton", "openai"], + default="triton", + required=False, + help="Describes the kind of service perf_analyzer will " + 'generate load for. The options are "triton" and ' + '"openai". The default value is "triton".', + ) + profile_group.add_argument( + "--streaming", + action="store_true", + required=False, + help=f"Enables the use of the streaming API.", + ) + profile_group.add_argument( + "--version", + action="store_true", required=False, - help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)", + help=f"Prints the version and exits.", ) def add_endpoint_args(parser): - parser.add_argument( + endpoint_group = parser.add_argument_group("Endpoint") + + endpoint_group.add_argument( + "-u", "--url", type=str, default="localhost:8001", required=False, + dest="u", + metavar="URL", help="URL of the endpoint to target for benchmarking.", ) - parser.add_argument( - "--provider", - type=str, - choices=["triton", "openai"], - required=False, - help="Provider format/schema to use for benchmarking.", - ) def add_dataset_args(parser): - parser.add_argument( - "--dataset", - type=str, - default="OpenOrca", - choices=["OpenOrca", "cnn_dailymail"], - required=False, - help="HuggingFace dataset to use for the benchmark.", - ) - parser.add_argument( - "--tokenizer", - type=str, - default="auto", - choices=["auto"], - required=False, - help="The HuggingFace tokenizer to use to interpret token metrics from final text results", - ) + pass + + dataset_group = parser.add_argument_group("Dataset") + # TODO: Do we want to remove dataset and tokenizer? + # dataset_group.add_argument( + # "--dataset", + # type=str, + # default="OpenOrca", + # choices=["OpenOrca", "cnn_dailymail"], + # required=False, + # help="HuggingFace dataset to use for the benchmark.", + # ) + # dataset_group.add_argument( + # "--tokenizer", + # type=str, + # default="auto", + # choices=["auto"], + # required=False, + # help="The HuggingFace tokenizer to use to interpret token metrics from final text results", + # ) ### Entrypoint ### @@ -125,22 +198,19 @@ def add_dataset_args(parser): def parse_args(argv=None): parser = argparse.ArgumentParser( prog="genai-pa", - description="CLI to profile LLMs and Generative AI models with PA", + description="CLI to profile LLMs and Generative AI models with Perf Analyzer", ) parser.set_defaults(func=handler) # Conceptually group args for easier visualization - model_group = parser.add_argument_group("Model") - add_model_args(model_group) + add_model_args(parser) + add_profile_args(parser) + add_endpoint_args(parser) + add_dataset_args(parser) - profile_group = parser.add_argument_group("Profiling") - add_profile_args(profile_group) + args = parser.parse_args(argv) - endpoint_group = parser.add_argument_group("Endpoint") - add_endpoint_args(endpoint_group) + args = update_load_manager_args(args) + args = prune_args(args) - dataset_group = parser.add_argument_group("Dataset") - add_dataset_args(dataset_group) - - args = parser.parse_args(argv) return args diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py new file mode 100644 index 000000000..7f2fd8f82 --- /dev/null +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py @@ -0,0 +1,36 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from pathlib import Path + + +def remove_file(file: Path): + if file.is_file(): + file.unlink() + + +def convert_option_name(name: str) -> str: + return name.replace("_", "-") diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py index 8e466b1fd..49ca35aef 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py @@ -24,10 +24,10 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import json import logging import subprocess +import genai_pa.utils as utils from genai_pa.constants import LOGGER_NAME logger = logging.getLogger(LOGGER_NAME) @@ -35,22 +35,26 @@ class Profiler: @staticmethod - def run(model): - # TODO: Replace with other plumbing - input_file = "/tmp/input_data.json" - with open(input_file, "w") as f: - data = {"data": [{"text_input": ["hi"]}]} - json.dump(data, f) + def run(model, args=None): + skip_args = ["model", "func"] + if hasattr(args, "version"): + cmd = f"perf_analyzer --version" + else: + utils.remove_file(args.profile_export_file) - cmd = [ - "perf_analyzer", - "-i", - "grpc", - "--streaming", - "-m", - model, - "--input-data", - input_file, - ] + cmd = f"perf_analyzer -m {model} --async " + for arg, value in vars(args).items(): + if arg in skip_args: + pass + elif value is True: + cmd += f"--{arg} " + elif arg == "batch_size": + cmd += f"-b {value} " + else: + if len(arg) == 1: + cmd += f"-{arg} {value}" + else: + arg = utils.convert_option_name(arg) + cmd += f"--{arg} {value} " logger.info(f"Running Perf Analyzer : '{cmd}'") - subprocess.run(cmd) + subprocess.run(cmd, shell=True)