triton-inference-server · dyastremsky · Feb 27, 2024 · Feb 27, 2024 · Feb 27, 2024
diff --git a/src/c++/perf_analyzer/genai-pa/README.md b/src/c++/perf_analyzer/genai-pa/README.md
@@ -0,0 +1,30 @@
+# genai-pa
+
+## Installation
+
+### Install from Source
+
+```bash
+pip install .
+```
+
+## Quickstart
+
+```bash
+# Explore the commands
+genai-pa -h
+```
+
+## Examples
+
+```
+# Profile an LLM with hard-coded inputs
+genai-pa -m my_llm_model
+```
+
+## Test
+
+```
+pip install .
+pytest tests/
+```
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/.gitignore b/src/c++/perf_analyzer/genai-pa/genai_pa/.gitignore
@@ -0,0 +1,2 @@
+*.json
+*.cache
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/__init__.py b/src/c++/perf_analyzer/genai-pa/genai_pa/__init__.py
@@ -0,0 +1,27 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+__version__ = "0.0.1"
diff --git a/src/c++/perf_analyzer/genai-pa/main.py → ...f_analyzer/genai-pa/genai_pa/constants.py b/src/c++/perf_analyzer/genai-pa/main.py → ...f_analyzer/genai-pa/genai_pa/constants.py
@@ -25,10 +25,4 @@
 # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
 # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
 
-
-import argparse
-import sys
-
-args = parser.parse_args()
-if __name__ == "__main__":
-    sys.exit(0)
+LOGGER_NAME: str = "genai-pa"
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/main.py b/src/c++/perf_analyzer/genai-pa/genai_pa/main.py
@@ -0,0 +1,58 @@
+#!/usr/bin/env python3
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import logging
+import sys
+
+from genai_pa import parser
+from genai_pa.constants import LOGGER_NAME
+
+logging.basicConfig(level=logging.INFO, format="%(name)s - %(levelname)s - %(message)s")
+logger = logging.getLogger(LOGGER_NAME)
+
+
+# Separate function that can raise exceptions used for testing
+# to assert correct errors and messages.
+# Optional argv used for testing - will default to sys.argv if None.
+def run(argv=None):
+    args = parser.parse_args(argv)
+    args.func(args)
+
+
+def main():
+    # Interactive use will catch exceptions and log formatted errors rather than tracebacks.
+    try:
+        run()
+    except Exception as e:
+        logger.error(f"{e}")
+        return 1
+
+    return 0
+
+
+if __name__ == "__main__":
+    sys.exit(main())
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py b/src/c++/perf_analyzer/genai-pa/genai_pa/parser.py
@@ -0,0 +1,146 @@
+#!/usr/bin/env python3
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import argparse
+import logging
+
+from genai_pa.constants import LOGGER_NAME
+
+logger = logging.getLogger(LOGGER_NAME)
+
+### Handlers ###
+
+
+# NOTE: Placeholder
+def handler(args):
+    from genai_pa.wrapper import Profiler
+
+    Profiler.run(
+        model=args.model,
+    )
+
+
+### Parsers ###
+
+
+def add_model_args(parser):
+    parser.add_argument(
+        "-m",
+        "--model",
+        type=str,
+        required=True,
+        help=f"The name of the model to benchmark.",
+    )
+
+
+def add_profile_args(parser):
+    parser.add_argument(
+        "-b",
+        "--batch-size",
+        type=int,
+        default=1,
+        required=False,
+        help="The batch size / concurrency to benchmark. (Default: 1)",
+    )
+    parser.add_argument(
+        "--input-length",
+        type=int,
+        default=128,
+        required=False,
+        help="The input length (tokens) to use for benchmarking LLMs. (Default: 128)",
+    )
+    parser.add_argument(
+        "--output-length",
+        type=int,
+        default=128,
+        required=False,
+        help="The output length (tokens) to use for benchmarking LLMs. (Default: 128)",
+    )
+
+
+def add_endpoint_args(parser):
+    parser.add_argument(
+        "--url",
+        type=str,
+        default="localhost:8001",
+        required=False,
+        help="URL of the endpoint to target for benchmarking.",
+    )
+    parser.add_argument(
+        "--provider",
+        type=str,
+        choices=["triton", "openai"],
+        required=False,
+        help="Provider format/schema to use for benchmarking.",
+    )
+
+
+def add_dataset_args(parser):
+    parser.add_argument(
+        "--dataset",
+        type=str,
+        default="OpenOrca",
+        choices=["OpenOrca", "cnn_dailymail"],
+        required=False,
+        help="HuggingFace dataset to use for the benchmark.",
+    )
+    parser.add_argument(
+        "--tokenizer",
+        type=str,
+        default="auto",
+        choices=["auto"],
+        required=False,
+        help="The HuggingFace tokenizer to use to interpret token metrics from final text results",
+    )
+
+
+### Entrypoint ###
+
+
+# Optional argv used for testing - will default to sys.argv if None.
+def parse_args(argv=None):
+    parser = argparse.ArgumentParser(
+        prog="genai-pa",
+        description="CLI to profile LLMs and Generative AI models with PA",
+    )
+    parser.set_defaults(func=handler)
+
+    # Conceptually group args for easier visualization
+    model_group = parser.add_argument_group("Model")
+    add_model_args(model_group)
+
+    profile_group = parser.add_argument_group("Profiling")
+    add_profile_args(profile_group)
+
+    endpoint_group = parser.add_argument_group("Endpoint")
+    add_endpoint_args(endpoint_group)
+
+    dataset_group = parser.add_argument_group("Dataset")
+    add_dataset_args(dataset_group)
+
+    args = parser.parse_args(argv)
+    return args
diff --git a/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py b/src/c++/perf_analyzer/genai-pa/genai_pa/wrapper.py
@@ -0,0 +1,56 @@
+# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+#
+# Redistribution and use in source and binary forms, with or without
+# modification, are permitted provided that the following conditions
+# are met:
+#  * Redistributions of source code must retain the above copyright
+#    notice, this list of conditions and the following disclaimer.
+#  * Redistributions in binary form must reproduce the above copyright
+#    notice, this list of conditions and the following disclaimer in the
+#    documentation and/or other materials provided with the distribution.
+#  * Neither the name of NVIDIA CORPORATION nor the names of its
+#    contributors may be used to endorse or promote products derived
+#    from this software without specific prior written permission.
+#
+# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
+# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+# PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE COPYRIGHT OWNER OR
+# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+import json
+import logging
+import subprocess
+
+from genai_pa.constants import LOGGER_NAME
+
+logger = logging.getLogger(LOGGER_NAME)
+
+
+class Profiler:
+    @staticmethod
+    def run(model):
+        # TODO: Replace with other plumbing
+        input_file = "/tmp/input_data.json"
+        with open(input_file, "w") as f:
+            data = {"data": [{"text_input": ["hi"]}]}
+            json.dump(data, f)
+
+        cmd = [
+            "perf_analyzer",
+            "-i",
+            "grpc",
+            "--streaming",
+            "-m",
+            model,
+            "--input-data",
+            input_file,
+        ]
+        logger.info(f"Running Perf Analyzer : '{cmd}'")
+        subprocess.run(cmd)