triton-inference-server · nv-hwoo · Jul 10, 2024 · Jun 24, 2024 · Jun 27, 2024 · Jun 27, 2024
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
@@ -12,10 +12,12 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import base64
 import json
 import random
 from copy import deepcopy
 from enum import Enum, auto
+from io import BytesIO
 from pathlib import Path
 from typing import Any, Dict, List, Optional, Tuple, cast
 
@@ -24,9 +26,53 @@
 from genai_perf.exceptions import GenAIPerfException
 from genai_perf.llm_inputs.synthetic_prompt_generator import SyntheticPromptGenerator
 from genai_perf.tokenizer import DEFAULT_TOKENIZER, Tokenizer, get_tokenizer
+from PIL import Image, ImageDraw
 from requests import Response
 
 
+# (TMA-1984) Remove the dummy image input with random noise image
+def make_snowman_image():
+    # Create a blank image with white background
+    img = Image.new("RGB", (600, 800), color="skyblue")
+    d = ImageDraw.Draw(img)
+
+    # Draw the snowman's body (three circles)
+    body_color = "white"
+    d.ellipse([200, 500, 400, 700], fill=body_color, outline="black")  # Bottom circle
+    d.ellipse([225, 350, 375, 550], fill=body_color, outline="black")  # Middle circle
+    d.ellipse([250, 200, 350, 400], fill=body_color, outline="black")  # Head circle
+
+    # Draw the snowman's eyes
+    eye_color = "black"
+    d.ellipse([275, 250, 285, 260], fill=eye_color)  # Left eye
+    d.ellipse([315, 250, 325, 260], fill=eye_color)  # Right eye
+
+    # Draw the snowman's nose (carrot)
+    nose_color = "orange"
+    d.polygon([(300, 270), (300, 280), (340, 275)], fill=nose_color)  # Nose
+
+    # Draw the snowman's mouth (smile)
+    mouth_color = "black"
+    d.arc([275, 290, 325, 310], start=0, end=180, fill=mouth_color)  # Smile
+
+    # Draw the snowman's buttons
+    d.ellipse([290, 420, 310, 440], fill=eye_color)  # Top button
+    d.ellipse([290, 460, 310, 480], fill=eye_color)  # Middle button
+    d.ellipse([290, 500, 310, 520], fill=eye_color)  # Bottom button
+
+    # Draw the snowman's arms
+    arm_color = "brown"
+    d.line([225, 450, 150, 400], fill=arm_color, width=5)  # Left arm
+    d.line([375, 450, 450, 400], fill=arm_color, width=5)  # Right arm
+
+    return img
+
+
+class ImageFormat(Enum):
+    PNG = auto()
+    JPEG = auto()
+
+
 class ModelSelectionStrategy(Enum):
     ROUND_ROBIN = auto()
     RANDOM = auto()
@@ -42,6 +88,7 @@ class OutputFormat(Enum):
     OPENAI_CHAT_COMPLETIONS = auto()
     OPENAI_COMPLETIONS = auto()
     OPENAI_EMBEDDINGS = auto()
+    OPENAI_VISION = auto()
     RANKINGS = auto()
     TENSORRTLLM = auto()
     VLLM = auto()
@@ -308,6 +355,12 @@ def get_generic_dataset_json(
             else:
                 raise GenAIPerfException("Input source is not recognized.")
 
+            if output_format == OutputFormat.OPENAI_VISION:
+                snowman_image = make_snowman_image()
+                generic_dataset_json = cls._add_images_to_generic_json(
+                    generic_dataset_json, snowman_image
+                )
+
         return generic_dataset_json
 
     @classmethod
@@ -544,6 +597,35 @@ def verify_file(cls, input_filename: Path) -> None:
         if not input_filename.exists():
             raise FileNotFoundError(f"The file '{input_filename}' does not exist.")
 
+    @classmethod
+    def _add_images_to_generic_json(
+        cls, generic_dataset_json: Dict[str, List[Dict]], img: Image
+    ) -> Dict[str, List[Dict]]:
+        # (TMA-1985) Support multiple image formats
+        img_format = ImageFormat.PNG
+        img_base64 = cls._encode_image(img, img_format)
+        for row in generic_dataset_json["rows"]:
+            if isinstance(row["text_input"], str):
+                row["text_input"] = [
+                    {
+                        "type": "text",
+                        "text": row["text_input"],
+                    },
+                    {
+                        "type": "image_url",
+                        "image_url": {"url": f"data:image/png;base64,{img_base64}"},
+                    },
+                ]
+
+        return generic_dataset_json
+
+    @classmethod
+    def _encode_image(cls, img: Image, format=ImageFormat.PNG):
+        """Encodes an image into base64 encoding."""
+        buffered = BytesIO()
+        img.save(buffered, format=format.name)
+        return base64.b64encode(buffered.getvalue()).decode("utf-8")
+
     @classmethod
     def _convert_generic_json_to_output_format(
         cls,
@@ -558,7 +640,10 @@ def _convert_generic_json_to_output_format(
         model_name: list = [],
         model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
     ) -> Dict:
-        if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
+        if (
+            output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
+            or output_format == OutputFormat.OPENAI_VISION
+        ):
             output_json = cls._convert_generic_json_to_openai_chat_completions_format(
                 generic_dataset,
                 add_model_name,

diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
@@ -68,6 +68,7 @@ def to_lowercase(self):
     "completions": "v1/completions",
     "embeddings": "v1/embeddings",
     "rankings": "v1/ranking",
+    "vision": "v1/chat/completions",
 }
 
 
@@ -131,6 +132,11 @@ def _check_conditional_args(
             elif args.endpoint_type == "rankings":
                 args.output_format = OutputFormat.RANKINGS
 
+            # (TMA-1986) deduce vision format from chat completions + image CLI
+            # because there's no openai vision endpoint.
+            elif args.endpoint_type == "vision":
+                args.output_format = OutputFormat.OPENAI_VISION
+
             if args.endpoint is not None:
                 args.endpoint = args.endpoint.lstrip(" /")
             else:
@@ -492,7 +498,7 @@ def _add_endpoint_args(parser):
     endpoint_group.add_argument(
         "--endpoint-type",
         type=str,
-        choices=["chat", "completions", "embeddings", "rankings"],
+        choices=["chat", "completions", "embeddings", "rankings", "vision"],
         required=False,
         help=f"The endpoint-type to send requests to on the "
         'server. This is only used with the "openai" service-kind.',

diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/llm_profile_data_parser.py
@@ -218,6 +218,9 @@ def _get_openai_input_text(self, req_inputs: dict) -> str:
             return payload["messages"][0]["content"]
         elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
             return payload["prompt"]
+        elif self._response_format == ResponseFormat.OPENAI_VISION:
+            content = payload["messages"][0]["content"]
+            return " ".join(c["text"] for c in content if c["type"] == "text")
         else:
             raise ValueError(
                 "Failed to parse OpenAI request input in profile export file."

diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py b/src/c++/perf_analyzer/genai-perf/genai_perf/profile_data_parser/profile_data_parser.py
@@ -39,6 +39,7 @@ class ResponseFormat(Enum):
     OPENAI_CHAT_COMPLETIONS = auto()
     OPENAI_COMPLETIONS = auto()
     OPENAI_EMBEDDINGS = auto()
+    OPENAI_VISION = auto()
     RANKINGS = auto()
     TRITON = auto()
 
@@ -59,21 +60,33 @@ def _get_profile_metadata(self, data: dict) -> None:
             if data["endpoint"] == "rerank":
                 self._response_format = ResponseFormat.HUGGINGFACE_RANKINGS
             elif data["endpoint"] == "v1/chat/completions":
-                self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
+                # (TPA-66) add PA metadata to deduce the response format instead
+                # of parsing the request input payload in profile export json
+                # file.
+                request = data["experiments"][0]["requests"][0]
+                request_input = request["request_inputs"]["payload"]
+                if "image_url" in request_input:
+                    self._response_format = ResponseFormat.OPENAI_VISION
+                else:
+                    self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
             elif data["endpoint"] == "v1/completions":
                 self._response_format = ResponseFormat.OPENAI_COMPLETIONS
             elif data["endpoint"] == "v1/embeddings":
                 self._response_format = ResponseFormat.OPENAI_EMBEDDINGS
             elif data["endpoint"] == "v1/ranking":
                 self._response_format = ResponseFormat.RANKINGS
             else:
-                # TPA-66: add PA metadata to handle this case
+                # (TPA-66) add PA metadata to handle this case
                 # When endpoint field is either empty or custom endpoint, fall
                 # back to parsing the response to extract the response format.
                 request = data["experiments"][0]["requests"][0]
+                request_input = request["request_inputs"]["payload"]
                 response = request["response_outputs"][0]["response"]
                 if "chat.completion" in response:
-                    self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
+                    if "image_url" in request_input:
+                        self._response_format = ResponseFormat.OPENAI_VISION
+                    else:
+                        self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
                 elif "text_completion" in response:
                     self._response_format = ResponseFormat.OPENAI_COMPLETIONS
                 elif "embedding" in response:

diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py b/src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py
diff --git a/src/c++/perf_analyzer/genai-perf/pyproject.toml b/src/c++/perf_analyzer/genai-perf/pyproject.toml
@@ -59,6 +59,7 @@ dependencies = [
   "pytest-mock",
   "pyyaml",
   "responses",
+  "pillow",
 ]
 
 # CLI Entrypoint

diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
@@ -29,6 +29,7 @@
     ModelSelectionStrategy,
     OutputFormat,
     PromptSource,
+    make_snowman_image,
 )
 from genai_perf.tokenizer import Tokenizer
 
@@ -78,6 +79,7 @@ class TestLlmInputs:
         ("triton", "tensorrtllm", OutputFormat.TENSORRTLLM),
         ("openai", "v1/completions", OutputFormat.OPENAI_COMPLETIONS),
         ("openai", "v1/chat/completions", OutputFormat.OPENAI_CHAT_COMPLETIONS),
+        ("openai", "v1/chat/completions", OutputFormat.OPENAI_VISION),
     ]
 
     @pytest.fixture
@@ -550,6 +552,42 @@ def test_llm_inputs_with_defaults(self, default_configured_url):
     #     else:
     #         assert False, f"Unsupported output format: {output_format}"
 
+    def test_add_image_inputs_openai_vision(self) -> None:
+        generic_json = {
+            "rows": [
+                {"text_input": "test input one"},
+                {"text_input": "test input two"},
+            ]
+        }
+        img = make_snowman_image()
+        encoded_img = LlmInputs._encode_image(img)
+
+        generic_json = LlmInputs._add_images_to_generic_json(generic_json, img)
+
+        row1 = generic_json["rows"][0]["text_input"]
+        assert row1 == [
+            {
+                "type": "text",
+                "text": "test input one",
+            },
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{encoded_img}"},
+            },
+        ]
+
+        row2 = generic_json["rows"][1]["text_input"]
+        assert row2 == [
+            {
+                "type": "text",
+                "text": "test input two",
+            },
+            {
+                "type": "image_url",
+                "image_url": {"url": f"data:image/png;base64,{encoded_img}"},
+            },
+        ]
+
     # def test_trtllm_default_max_tokens(self, default_tokenizer: Tokenizer) -> None:
     #     input_name = "max_tokens"
     #     input_value = 256

diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py b/src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
@@ -69,6 +69,7 @@ def test_llm_metric_system_metrics(self) -> None:
             output_sequence_lengths=[3, 4],
             input_sequence_lengths=[12, 34],
         )
+
         sys_metrics = m.system_metrics
         assert len(sys_metrics) == 2
         assert sys_metrics[0].name == "output_token_throughput"