Skip to content

Commit

Permalink
Update help messages, mistakes in args, removed sync and async option…
Browse files Browse the repository at this point in the history
…s, updated version option behavior, cleaned up code
  • Loading branch information
debermudez committed Feb 28, 2024
1 parent 1dd9cb6 commit 80849d1
Show file tree
Hide file tree
Showing 5 changed files with 90 additions and 79 deletions.
1 change: 0 additions & 1 deletion src/c++/perf_analyzer/genai-pa/genai_pa/constants.py
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down
2 changes: 2 additions & 0 deletions src/c++/perf_analyzer/genai-pa/genai_pa/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,6 +28,7 @@
import logging
import sys

import genai_pa.utils as utils
from genai_pa import parser
from genai_pa.constants import LOGGER_NAME

Expand All @@ -40,6 +41,7 @@
# Optional argv used for testing - will default to sys.argv if None.
def run(argv=None):
args = parser.parse_args(argv)
utils.remove_file(args.profile_export_file)
args.func(args)


Expand Down
101 changes: 36 additions & 65 deletions src/c++/perf_analyzer/genai-pa/genai_pa/parser.py
100755 → 100644
Original file line number Diff line number Diff line change
@@ -1,4 +1,3 @@
#!/usr/bin/env python3
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
Expand Down Expand Up @@ -27,11 +26,22 @@

import argparse
import logging
from pathlib import Path

from genai_pa.constants import LOGGER_NAME

logger = logging.getLogger(LOGGER_NAME)


def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
"""
Prune the parsed arguments to remove args with None or False values.
"""
return argparse.Namespace(
**{k: v for k, v in vars(args).items() if v is not None if v is not False}
)


### Handlers ###


Expand All @@ -55,28 +65,15 @@ def add_model_args(parser):
)


def add_profile_args(parser):
parser.add_argument(
"--async",
action="store_true",
required=False,
help=f"Enables asynchronous mode in perf_analyzer. "
"By default, perf_analyzer will use synchronous API to "
"request inference. However, if the model is sequential, "
"then default mode is asynchronous. Specify --sync to "
"operate sequential models in synchronous mode. In synchronous "
"mode, perf_analyzer will start threads equal to the concurrency "
"level. Use asynchronous mode to limit the number of threads, yet "
"maintain the concurrency.",
)
def add_profile_args(parser, exclusive_group):
parser.add_argument(
"-b",
type=int,
default=1,
required=False,
help="The batch size to benchmark. The default value is 1.",
)
parser.add_argument(
exclusive_group.add_argument(
"--concurrency",
type=int,
required=False,
Expand All @@ -92,70 +89,56 @@ def add_profile_args(parser):
"However, when running in synchronous mode,this value will be ignored. "
"The default value is 16.",
)
parser.add_argument(
"--output-length",
type=int,
default=128,
required=False,
help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
)
# TODO: necessary?
# parser.add_argument(
# "--output-length",
# type=int,
# default=128,
# required=False,
# help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
# )
parser.add_argument(
"--profile-export-file",
type=str,
required=False,
help="Specifies the path that the profile export will be "
"generated at. By default, the profile export will not be "
type=Path,
default="profile_export.json",
help="Specifies the path where the profile export will be "
"generated. By default, the profile export will not be "
"generated.",
)
parser.add_argument(
exclusive_group.add_argument(
"--request-rate",
type=float,
required=False,
help="Sets the request rates for load generated by analyzer. ",
help="Sets the request rate for the load generated by PA. ",
)
parser.add_argument(
"--service-kind",
type=str,
choices=["triton", "openai"],
default="triton",
required=False,
help="Sets the request rates for load generated by analyzer. "
"Describes the kind of service perf_analyzer to "
help="Describes the kind of service perf_analyzer will "
'generate load for. The options are "triton" and '
'"openai". The default value is "triton".',
)
parser.add_argument(
"--streaming",
action="store_true",
required=False,
help=f"Enables the use of streaming API. This flag is "
"only valid with gRPC protocol. By default, it is set false.",
)
parser.add_argument(
"--sync",
action="store_true",
required=False,
help=f"Enables asynchronous mode in perf_analyzer. "
"By default, perf_analyzer will use synchronous API to "
"request inference. However, if the model is sequential, "
"then default mode is asynchronous. Specify --sync to "
"operate sequential models in synchronous mode. In synchronous "
"mode, perf_analyzer will start threads equal to the concurrency "
"level. Use asynchronous mode to limit the number of threads, yet "
"maintain the concurrency.",
help=f"Enables the use of the streaming API.",
)
parser.add_argument(
"--version",
action="store_true",
required=False,
help=f"Enables the printing of the current version of perf_analyzer. "
"By default, it is set false.",
help=f"Prints the version and exits. By default, it is set false.",
)


def add_endpoint_args(parser):
parser.add_argument(
"--u",
"-u",
"--url",
type=str,
default="localhost:8001",
required=False,
Expand Down Expand Up @@ -200,7 +183,8 @@ def parse_args(argv=None):
add_model_args(model_group)

profile_group = parser.add_argument_group("Profiling")
add_profile_args(profile_group)
load_management_group = profile_group.add_mutually_exclusive_group()
add_profile_args(profile_group, load_management_group)

endpoint_group = parser.add_argument_group("Endpoint")
add_endpoint_args(endpoint_group)
Expand All @@ -210,25 +194,12 @@ def parse_args(argv=None):

args = parser.parse_args(argv)

# Concurrency and request rate are mutually exclusive
# TODO: Review if there is a cleaner way to do this with argparse
if args.concurrency is not None and args.request_rate is not None:
parser.error(
"Arguments --concurrency and --request_rate are mutually exclusive."
)

if args.concurrency is None and args.request_rate is None:
args.concurrency = 1
print(
"Neither --concurrency nor --request_rate provided. Setting concurrency to 1."
)

# Update GenAI-PA non-range attributes to range format for PA
for attr_key in ["concurrency", "request_rate"]:
attr_val = getattr(args, attr_key)
if attr_val is not None:
setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}:{attr_val}")
setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}")
delattr(args, attr_key)

args = argparse.Namespace(**{k: v for k, v in vars(args).items() if v is not None})
args = prune_args(args)
return args
38 changes: 38 additions & 0 deletions src/c++/perf_analyzer/genai-pa/genai_pa/utils.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,38 @@
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
#
# Redistribution and use in source and binary forms, with or without
# modification, are permitted provided that the following conditions
# are met:
# * Redistributions of source code must retain the above copyright
# notice, this list of conditions and the following disclaimer.
# * Redistributions in binary form must reproduce the above copyright
# notice, this list of conditions and the following disclaimer in the
# documentation and/or other materials provided with the distribution.
# * Neither the name of NVIDIA CORPORATION nor the names of its
# contributors may be used to endorse or promote products derived
# from this software without specific prior written permission.
#
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

from pathlib import Path


def file_exists(file: Path) -> bool:
if file.is_file() and file.exists():
return True
return False


def remove_file(file: Path):
if file_exists(file):
file.unlink()
27 changes: 14 additions & 13 deletions src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py
Original file line number Diff line number Diff line change
Expand Up @@ -24,7 +24,6 @@
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.

import json
import logging
import subprocess

Expand All @@ -35,19 +34,21 @@

class Profiler:
@staticmethod
def run(model, args):
def run(model, args=None):
skip_args = ["model", "func"]

cmd = f"perf_analyzer -m {model} "
for arg, value in vars(args).items():
if value is True:
cmd += f"--{arg} "
elif value is False:
pass
elif arg in skip_args:
pass
else:
cmd += f"--{arg} {value} "
if hasattr(args, "version"):
cmd = f"perf_analyzer --version"
else:
cmd = f"perf_analyzer -m {model} --async "
for arg, value in vars(args).items():
if arg in skip_args:
pass
elif value is True:
cmd += f"--{arg} "
elif arg is "url":

Check warning

Code scanning / CodeQL

Comparison using is when operands support `__eq__` Warning

Values compared using 'is' when equivalence is not the same as identity. Use '==' instead.
cmd += f"-u {value} "
else:
cmd += f"--{arg} {value} "

logger.info(f"Running Perf Analyzer : '{cmd}'")
subprocess.run(cmd, shell=True)

0 comments on commit 80849d1

Please sign in to comment.