Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[WIP] LLaVA support #720

Merged
merged 19 commits into from
Jul 10, 2024
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
170 changes: 170 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,12 +12,16 @@
# See the License for the specific language governing permissions and
# limitations under the License.

import base64
import json
import os
Fixed Show fixed Hide fixed
import random
from copy import deepcopy
from enum import Enum, auto
from io import BytesIO
from pathlib import Path
from typing import Any, Dict, List, Optional, Tuple, cast
from PIL import Image, ImageDraw

import requests
from genai_perf.constants import CNN_DAILY_MAIL, DEFAULT_INPUT_DATA_JSON, OPEN_ORCA
Expand All @@ -27,6 +31,49 @@
from requests import Response


def make_a_snowman():
# Create a blank image with white background
img = Image.new("RGB", (600, 800), color="skyblue")
d = ImageDraw.Draw(img)

# Draw the snowman's body (three circles)
body_color = "white"
d.ellipse([200, 500, 400, 700], fill=body_color, outline="black") # Bottom circle
d.ellipse([225, 350, 375, 550], fill=body_color, outline="black") # Middle circle
d.ellipse([250, 200, 350, 400], fill=body_color, outline="black") # Head circle

# Draw the snowman's eyes
eye_color = "black"
d.ellipse([275, 250, 285, 260], fill=eye_color) # Left eye
d.ellipse([315, 250, 325, 260], fill=eye_color) # Right eye

# Draw the snowman's nose (carrot)
nose_color = "orange"
d.polygon([(300, 270), (300, 280), (340, 275)], fill=nose_color) # Nose

# Draw the snowman's mouth (smile)
mouth_color = "black"
d.arc([275, 290, 325, 310], start=0, end=180, fill=mouth_color) # Smile

# Draw the snowman's buttons
d.ellipse([290, 420, 310, 440], fill=eye_color) # Top button
d.ellipse([290, 460, 310, 480], fill=eye_color) # Middle button
d.ellipse([290, 500, 310, 520], fill=eye_color) # Bottom button

# Draw the snowman's arms
arm_color = "brown"
d.line([225, 450, 150, 400], fill=arm_color, width=5) # Left arm
d.line([375, 450, 450, 400], fill=arm_color, width=5) # Right arm

return img


def encode_image(img: Image):
buffered = BytesIO()
img.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode("utf-8")


class ModelSelectionStrategy(Enum):
ROUND_ROBIN = auto()
RANDOM = auto()
Expand All @@ -41,6 +88,7 @@
class OutputFormat(Enum):
OPENAI_CHAT_COMPLETIONS = auto()
OPENAI_COMPLETIONS = auto()
OPENAI_VISION = auto()
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The response format for chat VLMs is the same as the regular chat completion since we just have text out, why have a separate entry?

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

The name of the enum is a bit misleading 😅 The OutputFormat enum is actually not about the format of the response but it's about the format of the resulting input json file by LlmInputs.

TENSORRTLLM = auto()
VLLM = auto()

Expand Down Expand Up @@ -187,6 +235,12 @@
else:
raise GenAIPerfException("Input source is not recognized.")

if output_format == OutputFormat.OPENAI_VISION:
snowman_image = make_a_snowman()
generic_dataset_json = cls._add_vision_input(
generic_dataset_json, snowman_image
)

if extra_inputs is None:
extra_inputs = {}

Expand Down Expand Up @@ -354,6 +408,29 @@
]
return dataset_json

@classmethod
def _add_vision_input(
cls, generic_dataset_json: Dict[str, List[Dict]], img: Image
) -> Dict[str, List[Dict]]:
img_base64 = encode_image(img)
for row in generic_dataset_json["rows"]:
if isinstance(row["text_input"], str):
row["text_input"] = [
dict(
type="text",
text=row["text_input"],
)
]

row["text_input"].append(
dict(
type="image_url",
image_url=f"data:image/png;base64,{img_base64}",
)
)

return generic_dataset_json

@classmethod
def _get_prompts_from_input_file(cls, input_filename: Path) -> List[str]:
"""
Expand Down Expand Up @@ -419,6 +496,18 @@
model_name,
model_selection_strategy,
)
elif output_format == OutputFormat.OPENAI_VISION:
output_json = cls._convert_generic_json_to_openai_vision_format(
generic_dataset,
add_model_name,
add_stream,
extra_inputs,
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)
elif output_format == OutputFormat.VLLM:
output_json = cls._convert_generic_json_to_vllm_format(
generic_dataset,
Expand Down Expand Up @@ -485,6 +574,41 @@

return pa_json

@classmethod
def _convert_generic_json_to_openai_vision_format(
cls,
dataset_json: Dict,
add_model_name: bool,
add_stream: bool,
extra_inputs: Dict,
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
(
system_role_headers,
user_role_headers,
text_input_headers,
) = cls._determine_json_feature_roles(dataset_json)
pa_json = cls._populate_openai_vision_output_json(
dataset_json,
system_role_headers,
user_role_headers,
text_input_headers,
add_model_name,
add_stream,
extra_inputs,
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
model_name,
model_selection_strategy,
)

return pa_json

@classmethod
def _convert_generic_json_to_openai_completions_format(
cls,
Expand Down Expand Up @@ -684,6 +808,52 @@

return pa_json

@classmethod
def _populate_openai_vision_output_json(
cls,
dataset_json: Dict,
system_role_headers: List[str],
user_role_headers: List[str],
text_input_headers: List[str],
add_model_name: bool,
add_stream: bool,
extra_inputs: Dict,
output_tokens_mean: int,
output_tokens_stddev: int,
output_tokens_deterministic: bool,
model_name: list = [],
model_selection_strategy: ModelSelectionStrategy = ModelSelectionStrategy.ROUND_ROBIN,
) -> Dict:
pa_json = cls._create_empty_openai_pa_json()

for index, entry in enumerate(dataset_json["rows"]):
iter_model_name = cls._select_model_name(
model_name, index, model_selection_strategy
)
pa_json["data"].append({"payload": []})
pa_json["data"][index]["payload"].append({"messages": []})

for header, content in entry.items():
new_message = cls._create_new_openai_chat_completions_message(
header, system_role_headers, user_role_headers, content
)

pa_json = cls._add_new_message_to_json(pa_json, index, new_message)

pa_json = cls._add_optional_tags_to_openai_json(
pa_json,
index,
add_model_name,
add_stream,
extra_inputs,
output_tokens_mean,
output_tokens_stddev,
output_tokens_deterministic,
iter_model_name,
)

return pa_json

@classmethod
def _populate_openai_completions_output_json(
cls,
Expand Down
23 changes: 20 additions & 3 deletions src/c++/perf_analyzer/genai-perf/genai_perf/llm_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -36,6 +36,7 @@

import numpy as np
import pandas as pd
from genai_perf.llm_inputs.llm_inputs import OutputFormat
from genai_perf.tokenizer import Tokenizer
from genai_perf.utils import load_json, remove_sse_prefix
from rich.console import Console
Expand All @@ -45,6 +46,7 @@
class ResponseFormat(Enum):
OPENAI_CHAT_COMPLETIONS = auto()
OPENAI_COMPLETIONS = auto()
OPENAI_VISION = auto()
TRITON = auto()


Expand Down Expand Up @@ -304,8 +306,9 @@ class ProfileDataParser:
extract core metrics and calculate various performance statistics.
"""

def __init__(self, filename: Path) -> None:
def __init__(self, filename: Path, output_format: OutputFormat) -> None:
data = load_json(filename)
self.output_format = output_format
self._get_profile_metadata(data)
self._parse_profile_data(data)

Expand All @@ -326,6 +329,8 @@ def _get_profile_metadata(self, data: dict) -> None:
self._response_format = ResponseFormat.OPENAI_CHAT_COMPLETIONS
elif "text_completion" in response:
self._response_format = ResponseFormat.OPENAI_COMPLETIONS
elif self.output_format == OutputFormat.OPENAI_VISION:
self._response_format = ResponseFormat.OPENAI_VISION
else:
raise RuntimeError("Unknown OpenAI response format.")

Expand Down Expand Up @@ -392,9 +397,10 @@ def __init__(
self,
filename: Path,
tokenizer: Tokenizer,
output_format: OutputFormat,
) -> None:
self._tokenizer = tokenizer
super().__init__(filename)
super().__init__(filename, output_format)

def _parse_requests(self, requests: dict) -> LLMMetrics:
"""Parse each requests in profile export data to extract key metrics."""
Expand Down Expand Up @@ -539,6 +545,11 @@ def _get_openai_input_text(self, req_inputs: dict) -> str:
payload = json.loads(req_inputs["payload"])
if self._response_format == ResponseFormat.OPENAI_CHAT_COMPLETIONS:
return payload["messages"][0]["content"]
elif self._response_format == ResponseFormat.OPENAI_VISION:
content = payload["messages"][0]["content"]
if isinstance(content, str):
content = [dict(type="text", text=content)]
return " ".join(c["text"] for c in content if c["type"] == "text")
elif self._response_format == ResponseFormat.OPENAI_COMPLETIONS:
return payload["prompt"]
else:
Expand Down Expand Up @@ -599,7 +610,13 @@ def _extract_openai_text_output(self, response: str) -> str:
# FIXME: TPA-47 workaround for vLLM not following OpenAI Completions
# API specification when streaming, missing 'object' field:
# https://platform.openai.com/docs/api-reference/completions
text_output = completions.get("text", "")
if "message" in completions:
output = completions["message"]
elif "delta" in completions:
output = completions["delta"]
else:
raise ValueError("Unknown OpenAI response with object type unspecified")
text_output = output.get("content", "")
elif data["object"] == "text_completion": # legacy
text_output = completions.get("text", "")
elif data["object"] == "chat.completion": # non-streaming
Expand Down
3 changes: 2 additions & 1 deletion src/c++/perf_analyzer/genai-perf/genai_perf/main.py
Original file line number Diff line number Diff line change
Expand Up @@ -87,6 +87,7 @@ def calculate_metrics(args: Namespace, tokenizer: Tokenizer) -> LLMProfileDataPa
return LLMProfileDataParser(
filename=args.profile_export_file,
tokenizer=tokenizer,
output_format=args.output_format,
)


Expand Down Expand Up @@ -115,7 +116,7 @@ def create_plots(args: Namespace) -> None:
output_dir=plot_dir,
)
config_parser = PlotConfigParser(plot_dir / "config.yaml")
plot_configs = config_parser.generate_configs()
plot_configs = config_parser.generate_configs(args.output_format)
plot_manager = PlotManager(plot_configs)
plot_manager.generate_plots()

Expand Down
6 changes: 4 additions & 2 deletions src/c++/perf_analyzer/genai-perf/genai_perf/parser.py
Original file line number Diff line number Diff line change
Expand Up @@ -110,6 +110,8 @@ def _check_conditional_args(
args.output_format = OutputFormat.OPENAI_CHAT_COMPLETIONS
elif args.endpoint_type == "completions":
args.output_format = OutputFormat.OPENAI_COMPLETIONS
elif args.endpoint_type == "vision":
args.output_format = OutputFormat.OPENAI_VISION

if args.endpoint is not None:
args.endpoint = args.endpoint.lstrip(" /")
Expand Down Expand Up @@ -404,7 +406,7 @@ def _add_endpoint_args(parser):
endpoint_group.add_argument(
"--endpoint-type",
type=str,
choices=["chat", "completions"],
choices=["chat", "completions", "vision"],
required=False,
help=f"The endpoint-type to send requests to on the "
'server. This is only used with the "openai" service-kind.',
Expand Down Expand Up @@ -586,7 +588,7 @@ def compare_handler(args: argparse.Namespace):
args.config = output_dir / "config.yaml"

config_parser = PlotConfigParser(args.config)
plot_configs = config_parser.generate_configs()
plot_configs = config_parser.generate_configs(args.output_format)
plot_manager = PlotManager(plot_configs)
plot_manager.generate_plots()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,7 @@
# Skip type checking to avoid mypy error
# Issue: https://github.com/python/mypy/issues/10632
import yaml # type: ignore
from genai_perf.llm_inputs.llm_inputs import OutputFormat
from genai_perf.llm_metrics import LLMProfileDataParser, Statistics
from genai_perf.plots.plot_config import PlotConfig, PlotType, ProfileRunData
from genai_perf.tokenizer import DEFAULT_TOKENIZER, get_tokenizer
Expand All @@ -47,7 +48,7 @@ class PlotConfigParser:
def __init__(self, filename: Path) -> None:
self._filename = filename

def generate_configs(self) -> List[PlotConfig]:
def generate_configs(self, output_format: OutputFormat) -> List[PlotConfig]:
"""Load YAML configuration file and convert to PlotConfigs."""
logger.info(
f"Generating plot configurations by parsing {self._filename}. "
Expand All @@ -60,7 +61,7 @@ def generate_configs(self) -> List[PlotConfig]:
# Collect profile run data
profile_data: List[ProfileRunData] = []
for filepath in config["paths"]:
stats = self._get_statistics(filepath)
stats = self._get_statistics(filepath, output_format)
profile_data.append(
ProfileRunData(
name=self._get_run_name(Path(filepath)),
Expand All @@ -84,11 +85,12 @@ def generate_configs(self) -> List[PlotConfig]:

return plot_configs

def _get_statistics(self, filepath: str) -> Statistics:
def _get_statistics(self, filepath: str, output_format: OutputFormat) -> Statistics:
"""Extract a single profile run data."""
data_parser = LLMProfileDataParser(
filename=Path(filepath),
tokenizer=get_tokenizer(DEFAULT_TOKENIZER),
output_format=output_format,
)
load_info = data_parser.get_profile_load_info()

Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
base_commands = {
"nim_chat": "genai-perf -s 999 -p 20000 -m llama-2-7b-chat -u http://localhost:9999 --service-kind openai --endpoint-type chat",
"nim_completions": "genai-perf -s 999 -p 20000 -m llama-2-7b -u http://localhost:9999 --service-kind openai --endpoint-type completions",
"nim_vision": "genai-perf -s 999 -p 20000 -m llava16-mistral-7b -u http://localhost:9999 --service-kind openai --endpoint-type vision",
"vllm_openai": "genai-perf -s 999 -p 20000 -m mistralai/Mistral-7B-v0.1 --service-kind openai --endpoint-type chat",
"triton_tensorrtllm": "genai-perf -s 999 -p 20000 -m llama-2-7b -u 0.0.0.0:9999 --service-kind triton --backend tensorrtllm",
"triton_vllm": "genai-perf -s 999 -p 20000 -m gpt2_vllm --service-kind triton --backend vllm",
Expand Down
Loading
Loading