Update help messages, mistakes in args, removed sync and async option…

…s, updated version option behavior, cleaned up code
triton-inference-server · Feb 28, 2024 · 80849d1 · 80849d1
1 parent 1dd9cb6
commit 80849d1
Show file tree

Hide file tree

Showing 5 changed files with 90 additions and 79 deletions.
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py b/src/c++/perf_analyzer/genai-pa/genai_pa/constants.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without

diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py
@@ -28,6 +28,7 @@
 import logging
 import sys
 
+import genai_pa.utils as utils
 from genai_pa import parser
 from genai_pa.constants import LOGGER_NAME
 
@@ -40,6 +41,7 @@
 # Optional argv used for testing - will default to sys.argv if None.
 def run(argv=None):
     args = parser.parse_args(argv)
+    utils.remove_file(args.profile_export_file)
     args.func(args)
 
 

diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py
@@ -1,4 +1,3 @@
-#!/usr/bin/env python3
 # Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
@@ -27,11 +26,22 @@
 
 import argparse
 import logging
+from pathlib import Path
 
 from genai_pa.constants import LOGGER_NAME
 
 logger = logging.getLogger(LOGGER_NAME)
 
+
+def prune_args(args: argparse.ArgumentParser) -> argparse.ArgumentParser:
+    """
+    Prune the parsed arguments to remove args with None or False values.
+    """
+    return argparse.Namespace(
+        **{k: v for k, v in vars(args).items() if v is not None if v is not False}
+    )
+
+
 ### Handlers ###
 
 
@@ -55,28 +65,15 @@ def add_model_args(parser):
     )
 
 
-def add_profile_args(parser):
-    parser.add_argument(
-        "--async",
-        action="store_true",
-        required=False,
-        help=f"Enables asynchronous mode in perf_analyzer. "
-        "By default, perf_analyzer will use synchronous API to "
-        "request inference. However, if the model is sequential, "
-        "then default mode is asynchronous. Specify --sync to "
-        "operate sequential models in synchronous mode. In synchronous "
-        "mode, perf_analyzer will start threads equal to the concurrency "
-        "level. Use asynchronous mode to limit the number of threads, yet "
-        "maintain the concurrency.",
-    )
+def add_profile_args(parser, exclusive_group):
     parser.add_argument(
         "-b",
         type=int,
         default=1,
         required=False,
         help="The batch size to benchmark. The default value is 1.",
     )
-    parser.add_argument(
+    exclusive_group.add_argument(
         "--concurrency",
         type=int,
         required=False,
@@ -92,70 +89,56 @@ def add_profile_args(parser):
         "However, when running in synchronous mode,this value will be ignored. "
         "The default value is 16.",
     )
-    parser.add_argument(
-        "--output-length",
-        type=int,
-        default=128,
-        required=False,
-        help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
-    )
+    # TODO: necessary?
+    # parser.add_argument(
+    #     "--output-length",
+    #     type=int,
+    #     default=128,
+    #     required=False,
+    #     help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
+    # )
     parser.add_argument(
         "--profile-export-file",
-        type=str,
-        required=False,
-        help="Specifies the path that the profile export will be "
-        "generated at. By default, the profile export will not be "
+        type=Path,
+        default="profile_export.json",
+        help="Specifies the path where the profile export will be "
+        "generated. By default, the profile export will not be "
         "generated.",
     )
-    parser.add_argument(
+    exclusive_group.add_argument(
         "--request-rate",
         type=float,
         required=False,
-        help="Sets the request rates for load generated by analyzer. ",
+        help="Sets the request rate for the load generated by PA. ",
     )
     parser.add_argument(
         "--service-kind",
         type=str,
         choices=["triton", "openai"],
         default="triton",
         required=False,
-        help="Sets the request rates for load generated by analyzer. "
-        "Describes the kind of service perf_analyzer to "
+        help="Describes the kind of service perf_analyzer will "
         'generate load for. The options are "triton" and '
         '"openai". The default value is "triton".',
     )
     parser.add_argument(
         "--streaming",
         action="store_true",
         required=False,
-        help=f"Enables the use of streaming API. This flag is "
-        "only valid with gRPC protocol. By default, it is set false.",
-    )
-    parser.add_argument(
-        "--sync",
-        action="store_true",
-        required=False,
-        help=f"Enables asynchronous mode in perf_analyzer. "
-        "By default, perf_analyzer will use synchronous API to "
-        "request inference. However, if the model is sequential, "
-        "then default mode is asynchronous. Specify --sync to "
-        "operate sequential models in synchronous mode. In synchronous "
-        "mode, perf_analyzer will start threads equal to the concurrency "
-        "level. Use asynchronous mode to limit the number of threads, yet "
-        "maintain the concurrency.",
+        help=f"Enables the use of the streaming API.",
     )
     parser.add_argument(
         "--version",
         action="store_true",
         required=False,
-        help=f"Enables the printing of the current version of perf_analyzer. "
-        "By default, it is set false.",
+        help=f"Prints the version and exits. By default, it is set false.",
     )
 
 
 def add_endpoint_args(parser):
     parser.add_argument(
-        "--u",
+        "-u",
+        "--url",
         type=str,
         default="localhost:8001",
         required=False,
@@ -200,7 +183,8 @@ def parse_args(argv=None):
     add_model_args(model_group)
 
     profile_group = parser.add_argument_group("Profiling")
-    add_profile_args(profile_group)
+    load_management_group = profile_group.add_mutually_exclusive_group()
+    add_profile_args(profile_group, load_management_group)
 
     endpoint_group = parser.add_argument_group("Endpoint")
     add_endpoint_args(endpoint_group)
@@ -210,25 +194,12 @@ def parse_args(argv=None):
 
     args = parser.parse_args(argv)
 
-    # Concurrency and request rate are mutually exclusive
-    # TODO: Review if there is a cleaner way to do this with argparse
-    if args.concurrency is not None and args.request_rate is not None:
-        parser.error(
-            "Arguments --concurrency and --request_rate are mutually exclusive."
-        )
-
-    if args.concurrency is None and args.request_rate is None:
-        args.concurrency = 1
-        print(
-            "Neither --concurrency nor --request_rate provided. Setting concurrency to 1."
-        )
-
     # Update GenAI-PA non-range attributes to range format for PA
     for attr_key in ["concurrency", "request_rate"]:
         attr_val = getattr(args, attr_key)
         if attr_val is not None:
-            setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}:{attr_val}")
+            setattr(args, f"{attr_key}_range", f"{attr_val}:{attr_val}")
         delattr(args, attr_key)
 
-    args = argparse.Namespace(**{k: v for k, v in vars(args).items() if v is not None})
+    args = prune_args(args)
     return args
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py b/src/c++/perf_analyzer/genai-pa/genai_pa/utils.py
@@ -0,0 +1,38 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+from pathlib import Path
+
+
+def file_exists(file: Path) -> bool:
+    if file.is_file() and file.exists():
+        return True
+    return False
+
+
+def remove_file(file: Path):
+    if file_exists(file):
+        file.unlink()
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py
@@ -24,7 +24,6 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-import json
 import logging
 import subprocess
 
@@ -35,19 +34,21 @@
 
 class Profiler:
     @staticmethod
-    def run(model, args):
+    def run(model, args=None):
         skip_args = ["model", "func"]
-
-        cmd = f"perf_analyzer -m {model} "
-        for arg, value in vars(args).items():
-            if value is True:
-                cmd += f"--{arg} "
-            elif value is False:
-                pass
-            elif arg in skip_args:
-                pass
-            else:
-                cmd += f"--{arg} {value} "
+        if hasattr(args, "version"):
+            cmd = f"perf_analyzer --version"
+        else:
+            cmd = f"perf_analyzer -m {model} --async "
+            for arg, value in vars(args).items():
+                if arg in skip_args:
+                    pass
+                elif value is True:
+                    cmd += f"--{arg} "
+                elif arg is "url":
+                    cmd += f"-u {value} "
+                else:
+                    cmd += f"--{arg} {value} "
 
         logger.info(f"Running Perf Analyzer : '{cmd}'")
         subprocess.run(cmd, shell=True)