diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py index 09686f38f..8726fb454 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py index 79748df50..46b529f9b 100755 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py @@ -28,6 +28,7 @@ import logging import sys +import genai_pa.utils as utils from genai_pa import parser from genai_pa.constants import LOGGER_NAME @@ -40,6 +41,7 @@ # Optional argv used for testing - will default to sys.argv if None. def run(argv=None): args = parser.parse_args(argv) + utils.remove_file(args.profile_export_file) args.func(args) diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py old mode 100755 new mode 100644 index 160516090..9d3cf1abe --- a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py @@ -1,4 +1,3 @@ -#!/usr/bin/env python3 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -27,11 +26,22 @@ import argparse import logging +from pathlib import Path from genai_pa.constants import LOGGER_NAME logger = logging.getLogger(LOGGER_NAME) + +def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser: + """ + Prune the parsed arguments to remove args with None or False values. + """ + return argparse.Namespace( + **{k: v for k, v in vars(args).items() if v is not None if v is not False} + ) + + ### Handlers ### @@ -55,20 +65,7 @@ def add_model_args(parser): ) -def add_profile_args(parser): - parser.add_argument( - "--async", - action="store_true", - required=False, - help=f"Enables asynchronous mode in perf_analyzer. " - "By default, perf_analyzer will use synchronous API to " - "request inference. However, if the model is sequential, " - "then default mode is asynchronous. Specify --sync to " - "operate sequential models in synchronous mode. In synchronous " - "mode, perf_analyzer will start threads equal to the concurrency " - "level. Use asynchronous mode to limit the number of threads, yet " - "maintain the concurrency.", - ) +def add_profile_args(parser, exclusive_group): parser.add_argument( "-b", type=int, @@ -76,7 +73,7 @@ def add_profile_args(parser): required=False, help="The batch size to benchmark. The default value is 1.", ) - parser.add_argument( + exclusive_group.add_argument( "--concurrency", type=int, required=False, @@ -92,26 +89,27 @@ def add_profile_args(parser): "However, when running in synchronous mode,this value will be ignored. " "The default value is 16.", ) - parser.add_argument( - "--output-length", - type=int, - default=128, - required=False, - help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)", - ) + # TODO: necessary? + # parser.add_argument( + # "--output-length", + # type=int, + # default=128, + # required=False, + # help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)", + # ) parser.add_argument( "--profile-export-file", - type=str, - required=False, - help="Specifies the path that the profile export will be " - "generated at. By default, the profile export will not be " + type=Path, + default="profile_export.json", + help="Specifies the path where the profile export will be " + "generated. By default, the profile export will not be " "generated.", ) - parser.add_argument( + exclusive_group.add_argument( "--request-rate", type=float, required=False, - help="Sets the request rates for load generated by analyzer. ", + help="Sets the request rate for the load generated by PA. ", ) parser.add_argument( "--service-kind", @@ -119,8 +117,7 @@ def add_profile_args(parser): choices=["triton", "openai"], default="triton", required=False, - help="Sets the request rates for load generated by analyzer. " - "Describes the kind of service perf_analyzer to " + help="Describes the kind of service perf_analyzer will " 'generate load for. The options are "triton" and ' '"openai". The default value is "triton".', ) @@ -128,34 +125,20 @@ def add_profile_args(parser): "--streaming", action="store_true", required=False, - help=f"Enables the use of streaming API. This flag is " - "only valid with gRPC protocol. By default, it is set false.", - ) - parser.add_argument( - "--sync", - action="store_true", - required=False, - help=f"Enables asynchronous mode in perf_analyzer. " - "By default, perf_analyzer will use synchronous API to " - "request inference. However, if the model is sequential, " - "then default mode is asynchronous. Specify --sync to " - "operate sequential models in synchronous mode. In synchronous " - "mode, perf_analyzer will start threads equal to the concurrency " - "level. Use asynchronous mode to limit the number of threads, yet " - "maintain the concurrency.", + help=f"Enables the use of the streaming API.", ) parser.add_argument( "--version", action="store_true", required=False, - help=f"Enables the printing of the current version of perf_analyzer. " - "By default, it is set false.", + help=f"Prints the version and exits. By default, it is set false.", ) def add_endpoint_args(parser): parser.add_argument( - "--u", + "-u", + "--url", type=str, default="localhost:8001", required=False, @@ -200,7 +183,8 @@ def parse_args(argv=None): add_model_args(model_group) profile_group = parser.add_argument_group("Profiling") - add_profile_args(profile_group) + load_management_group = profile_group.add_mutually_exclusive_group() + add_profile_args(profile_group, load_management_group) endpoint_group = parser.add_argument_group("Endpoint") add_endpoint_args(endpoint_group) @@ -210,25 +194,12 @@ def parse_args(argv=None): args = parser.parse_args(argv) - # Concurrency and request rate are mutually exclusive - # TODO: Review if there is a cleaner way to do this with argparse - if args.concurrency is not None and args.request_rate is not None: - parser.error( - "Arguments --concurrency and --request_rate are mutually exclusive." - ) - - if args.concurrency is None and args.request_rate is None: - args.concurrency = 1 - print( - "Neither --concurrency nor --request_rate provided. Setting concurrency to 1." - ) - # Update GenAI-PA non-range attributes to range format for PA for attr_key in ["concurrency", "request_rate"]: attr_val = getattr(args, attr_key) if attr_val is not None: - setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}:{attr_val}") + setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}") delattr(args, attr_key) - args = argparse.Namespace(**{k: v for k, v in vars(args).items() if v is not None}) + args = prune_args(args) return args diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py new file mode 100644 index 000000000..639c0fccb --- /dev/null +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py @@ -0,0 +1,38 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +from pathlib import Path + + +def file_exists(file: Path) -> bool: + if file.is_file() and file.exists(): + return True + return False + + +def remove_file(file: Path): + if file_exists(file): + file.unlink() diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py index 2b1c72b59..019b165e4 100644 --- a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py +++ b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py @@ -24,7 +24,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import json import logging import subprocess @@ -35,19 +34,21 @@ class Profiler: @staticmethod - def run(model, args): + def run(model, args=None): skip_args = ["model", "func"] - - cmd = f"perf_analyzer -m {model} " - for arg, value in vars(args).items(): - if value is True: - cmd += f"--{arg} " - elif value is False: - pass - elif arg in skip_args: - pass - else: - cmd += f"--{arg} {value} " + if hasattr(args, "version"): + cmd = f"perf_analyzer --version" + else: + cmd = f"perf_analyzer -m {model} --async " + for arg, value in vars(args).items(): + if arg in skip_args: + pass + elif value is True: + cmd += f"--{arg} " + elif arg is "url": + cmd += f"-u {value} " + else: + cmd += f"--{arg} {value} " logger.info(f"Running Perf Analyzer : '{cmd}'") subprocess.run(cmd, shell=True)