Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] LLaVA support #720

Merged
merged 19 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -12,10 +12,12 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import base64
import json
import random
from copy import deepcopy
from enum import Enum, auto
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, cast

Expand All @@ -24,9 +26,53 @@
from genai_perf.exceptions import GenAIPerfException
from genai_perf.llm_inputs.synthetic_prompt_generator import SyntheticPromptGenerator
from genai_perf.tokenizer import DEFAULT_TOKENIZER, Tokenizer, get_tokenizer
from PIL import Image, ImageDraw
from requests import Response


# (TMA-1984) Remove the dummy image input with random noise image
def make_snowman_image():
# Create a blank image with white background
img = Image.new("RGB", (600, 800), color="skyblue")
d = ImageDraw.Draw(img)

# Draw the snowman's body (three circles)
body_color = "white"
d.ellipse([200, 500, 400, 700], fill=body_color, outline="black") # Bottom circle
d.ellipse([225, 350, 375, 550], fill=body_color, outline="black") # Middle circle
d.ellipse([250, 200, 350, 400], fill=body_color, outline="black") # Head circle

# Draw the snowman's eyes
eye_color = "black"
d.ellipse([275, 250, 285, 260], fill=eye_color) # Left eye
d.ellipse([315, 250, 325, 260], fill=eye_color) # Right eye

# Draw the snowman's nose (carrot)
nose_color = "orange"
d.polygon([(300, 270), (300, 280), (340, 275)], fill=nose_color) # Nose

# Draw the snowman's mouth (smile)
mouth_color = "black"
d.arc([275, 290, 325, 310], start=0, end=180, fill=mouth_color) # Smile

# Draw the snowman's buttons
d.ellipse([290, 420, 310, 440], fill=eye_color) # Top button
d.ellipse([290, 460, 310, 480], fill=eye_color) # Middle button
d.ellipse([290, 500, 310, 520], fill=eye_color) # Bottom button

# Draw the snowman's arms
arm_color = "brown"
d.line([225, 450, 150, 400], fill=arm_color, width=5) # Left arm
d.line([375, 450, 450, 400], fill=arm_color, width=5) # Right arm

return img


class ImageFormat(Enum):
PNG = auto()
JPEG = auto()


class ModelSelectionStrategy(Enum):
ROUND_ROBIN = auto()
RANDOM = auto()
Expand All @@ -42,6 +88,7 @@ class OutputFormat(Enum):
OPENAI_CHAT_COMPLETIONS = auto()
OPENAI_COMPLETIONS = auto()
OPENAI_EMBEDDINGS = auto()
OPENAI_VISION = auto()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The response format for chat VLMs is the same as the regular chat completion since we just have text out, why have a separate entry?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of the enum is a bit misleading 😅 The OutputFormat enum is actually not about the format of the response but it's about the format of the resulting input json file by LlmInputs.

RANKINGS = auto()
TENSORRTLLM = auto()
VLLM = auto()
Expand Down Expand Up @@ -308,6 +355,12 @@ def get_generic_dataset_json(
else:
raise GenAIPerfException("Input source is not recognized.")

if output_format == OutputFormat.OPENAI_VISION:
snowman_image = make_snowman_image()
generic_dataset_json = cls._add_images_to_generic_json(
generic_dataset_json, snowman_image
)

return generic_dataset_json

@classmethod
Expand Down Expand Up @@ -544,6 +597,35 @@ def verify_file(cls, input_filename: Path) -> None:
if not input_filename.exists():
raise FileNotFoundError(f"The file '{input_filename}' does not exist.")

@classmethod
def _add_images_to_generic_json(
cls, generic_dataset_json: Dict[str, List[Dict]], img: Image
) -> Dict[str, List[Dict]]:
# (TMA-1985) Support multiple image formats
img_format = ImageFormat.PNG
img_base64 = cls._encode_image(img, img_format)
for row in generic_dataset_json["rows"]:
if isinstance(row["text_input"], str):
row["text_input"] = [
{
"type": "text",
"text": row["text_input"],
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{img_base64}"},
},
]

return generic_dataset_json

@classmethod
def _encode_image(cls, img: Image, format=ImageFormat.PNG):
"""Encodes an image into base64 encoding."""
buffered = BytesIO()
img.save(buffered, format=format.name)
return base64.b64encode(buffered.getvalue()).decode("utf-8")

@classmethod
def _convert_generic_json_to_output_format(
cls,
Expand All @@ -558,7 +640,10 @@ def _convert_generic_json_to_output_format(
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
if output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS:
if (
output_format == OutputFormat.OPENAI_CHAT_COMPLETIONS
or output_format == OutputFormat.OPENAI_VISION
):
output_json = cls._convert_generic_json_to_openai_chat_completions_format(
generic_dataset,
add_model_name,
Expand Down
8 changes: 7 additions & 1 deletion src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -68,6 +68,7 @@ def to_lowercase(self):
"completions": "v1/completions",
"embeddings": "v1/embeddings",
"rankings": "v1/ranking",
"vision": "v1/chat/completions",
}


Expand Down Expand Up @@ -131,6 +132,11 @@ def _check_conditional_args(
elif args.endpoint_type == "rankings":
args.output_format = OutputFormat.RANKINGS

# (TMA-1986) deduce vision format from chat completions + image CLI
# because there's no openai vision endpoint.
elif args.endpoint_type == "vision":
args.output_format = OutputFormat.OPENAI_VISION

if args.endpoint is not None:
args.endpoint = args.endpoint.lstrip(" /")
else:
Expand Down Expand Up @@ -492,7 +498,7 @@ def _add_endpoint_args(parser):
endpoint_group.add_argument(
"--endpoint-type",
type=str,
choices=["chat", "completions", "embeddings", "rankings"],
choices=["chat", "completions", "embeddings", "rankings", "vision"],
required=False,
help=f"The endpoint-type to send requests to on the "
'server. This is only used with the "openai" service-kind.',
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -218,6 +218,9 @@ def _get_openai_input_text(self, req_inputs: dict) -> str:
return payload["messages"][0]["content"]
elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
return payload["prompt"]
elif self._response_format == ResponseFormat.OPENAI_VISION:
content = payload["messages"][0]["content"]
return " ".join(c["text"] for c in content if c["type"] == "text")
else:
raise ValueError(
"Failed to parse OpenAI request input in profile export file."
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@ class ResponseFormat(Enum):
OPENAI_CHAT_COMPLETIONS = auto()
OPENAI_COMPLETIONS = auto()
OPENAI_EMBEDDINGS = auto()
OPENAI_VISION = auto()
RANKINGS = auto()
TRITON = auto()

Expand All @@ -59,21 +60,33 @@ def _get_profile_metadata(self, data: dict) -> None:
if data["endpoint"] == "rerank":
self._response_format = ResponseFormat.HUGGINGFACE_RANKINGS
elif data["endpoint"] == "v1/chat/completions":
self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
# (TPA-66) add PA metadata to deduce the response format instead
# of parsing the request input payload in profile export json
# file.
request = data["experiments"][0]["requests"][0]
request_input = request["request_inputs"]["payload"]
if "image_url" in request_input:
self._response_format = ResponseFormat.OPENAI_VISION
else:
self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
elif data["endpoint"] == "v1/completions":
self._response_format = ResponseFormat.OPENAI_COMPLETIONS
elif data["endpoint"] == "v1/embeddings":
self._response_format = ResponseFormat.OPENAI_EMBEDDINGS
elif data["endpoint"] == "v1/ranking":
self._response_format = ResponseFormat.RANKINGS
else:
# TPA-66: add PA metadata to handle this case
# (TPA-66) add PA metadata to handle this case
# When endpoint field is either empty or custom endpoint, fall
# back to parsing the response to extract the response format.
request = data["experiments"][0]["requests"][0]
request_input = request["request_inputs"]["payload"]
response = request["response_outputs"][0]["response"]
if "chat.completion" in response:
self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
if "image_url" in request_input:
self._response_format = ResponseFormat.OPENAI_VISION
else:
self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
elif "text_completion" in response:
self._response_format = ResponseFormat.OPENAI_COMPLETIONS
elif "embedding" in response:
Expand Down
92 changes: 0 additions & 92 deletions src/c++/perf_analyzer/genai-perf/genai_perf/test_end_to_end.py

This file was deleted.

1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@ dependencies = [
"pytest-mock",
"pyyaml",
"responses",
"pillow",
]

# CLI Entrypoint
Expand Down
38 changes: 38 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
ModelSelectionStrategy,
OutputFormat,
PromptSource,
make_snowman_image,
)
from genai_perf.tokenizer import Tokenizer

Expand Down Expand Up @@ -78,6 +79,7 @@ class TestLlmInputs:
("triton", "tensorrtllm", OutputFormat.TENSORRTLLM),
("openai", "v1/completions", OutputFormat.OPENAI_COMPLETIONS),
("openai", "v1/chat/completions", OutputFormat.OPENAI_CHAT_COMPLETIONS),
("openai", "v1/chat/completions", OutputFormat.OPENAI_VISION),
]

@pytest.fixture
Expand Down Expand Up @@ -550,6 +552,42 @@ def test_llm_inputs_with_defaults(self, default_configured_url):
# else:
# assert False, f"Unsupported output format: {output_format}"

def test_add_image_inputs_openai_vision(self) -> None:
generic_json = {
"rows": [
{"text_input": "test input one"},
{"text_input": "test input two"},
]
}
img = make_snowman_image()
encoded_img = LlmInputs._encode_image(img)

generic_json = LlmInputs._add_images_to_generic_json(generic_json, img)

row1 = generic_json["rows"][0]["text_input"]
assert row1 == [
{
"type": "text",
"text": "test input one",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{encoded_img}"},
},
]

row2 = generic_json["rows"][1]["text_input"]
assert row2 == [
{
"type": "text",
"text": "test input two",
},
{
"type": "image_url",
"image_url": {"url": f"data:image/png;base64,{encoded_img}"},
},
]

# def test_trtllm_default_max_tokens(self, default_tokenizer: Tokenizer) -> None:
# input_name = "max_tokens"
# input_value = 256
Expand Down
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_llm_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,7 @@ def test_llm_metric_system_metrics(self) -> None:
output_sequence_lengths=[3, 4],
input_sequence_lengths=[12, 34],
)

sys_metrics = m.system_metrics
assert len(sys_metrics) == 2
assert sys_metrics[0].name == "output_token_throughput"
Expand Down
Loading
Loading