Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

NVCF assets support #755

Closed
wants to merge 2 commits into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,105 @@
import base64
import json
import time
from pathlib import Path

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'Path' is not used.
from typing import Dict

import genai_perf.logging as logging

Check notice

Code scanning / CodeQL

Unused import Note

Import of 'logging' is not used.
import requests


def greedy_fill(size_limit, sizes):
remaining = size_limit
selected = []
for i, size in sorted(enumerate(sizes), key=lambda x: -x[1]):
if size <= remaining:
selected.append(i)
remaining -= size
return selected


NVCF_URL = "https://api.nvcf.nvidia.com/v2/nvcf"


class NvcfUploader:
def __init__(self, threshold_kbytes: int, nvcf_api_key: str):
self.threshold_kbytes = threshold_kbytes
self._upload_report: Dict[str, float] = {}
self._initialize_headers(nvcf_api_key)

def _initialize_headers(self, nvcf_api_key):
self._headers = {
"Authorization": f"Bearer {nvcf_api_key}",
"accept": "application/json",
"content-type": "application/json",
}

def _add_upload_report_entry(self, asset_id, time_delta):
self._upload_report[asset_id] = time_delta

def get_upload_report(self):
return self._upload_report.copy()

def upload_large_assets(self, dataset: Dict):
for row in dataset["rows"]:
sizes, entries = self._find_uploadable(row)
non_uploadable = self._calculate_data_size(row) - sum(sizes)
payload_limit = max(0, self.threshold_kbytes * 1000 - non_uploadable)
take = greedy_fill(payload_limit, sizes)
upload = set(range(len(entries))) - set(take)
for entry in (entries[i] for i in upload):
self._upload_image(entry)
return dataset

def _calculate_data_size(self, data):
return len(json.dumps(data))

def _find_uploadable(self, row):
found = zip(
*(
(self._calculate_data_size(entry), entry)
for entry in row.get("text_input", {})
if "image_url" in entry
)
)
found = list(found)
if not found:
return [], []
else:
return found

def _decode_base64_img_url(self, data):
prefix, payload = data.split(";")
_, img_format = prefix.split("/")
_, img_base64 = payload.split(",")
img = base64.b64decode(img_base64)
return img_format, img

def _upload_image_to_nvcf(self, data, img_format):
json = {
"contentType": f"image/{img_format}",
"description": "GenAI-perf synthetic image",
}
new_asset_resp = requests.post(
f"{NVCF_URL}/assets", headers=self._headers, json=json
).json()
upload_headers = {
"Content-Type": json["contentType"],
"x-amz-meta-nvcf-asset-description": json["description"],
}
upload_resp = requests.put(
new_asset_resp["uploadUrl"], headers=upload_headers, data=data
)
print(
f"Uploaded asset {new_asset_resp['assetId']} with status {upload_resp.status_code}"
)
return new_asset_resp["assetId"]

def _upload_image(self, data):
img_format, img = self._decode_base64_img_url(data["image_url"]["url"])

start_time = time.perf_counter()
asset_id = self._upload_image_to_nvcf(img, img_format)
data["image_url"]["url"] = f"data:image/{img_format};asset_id,{asset_id}"
end_time = time.perf_counter()
self._add_upload_report_entry(asset_id, end_time - start_time)
1 change: 1 addition & 0 deletions src/c++/perf_analyzer/genai-perf/pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -48,6 +48,7 @@ requires-python = ">=3.8,<4"
dependencies = [
"numpy<2",
"pytest",
"requests-mock",
"rich",
"transformers",
"plotly",
Expand Down
180 changes: 180 additions & 0 deletions src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,180 @@
import base64
import json

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'json' is not used.
from io import BytesIO

import pytest
from genai_perf.llm_inputs.nvcf_assets import NvcfUploader
from PIL import Image


def test_not_upload_text():
DUMMY_API_KEY = "test api key"
input_dataset = {
"rows": [{"text_input": "small message"}],
}
sut = NvcfUploader(threshold_kbytes=0, nvcf_api_key=DUMMY_API_KEY)

new_dataset = sut.upload_large_assets(input_dataset)

assert (
new_dataset == input_dataset
), "There is no row to upload - dataset should stay unchanged"


def generate_image(approx_kbytes):
estimated_base64_ratio = 4 / 3 # Base64 encoding increases size by about 33%
color_channels = 3
npixels = approx_kbytes * 1000 / color_channels / estimated_base64_ratio
width = height = int(npixels**0.5)
img = Image.new("RGB", (width, height), color="white")
buffered = BytesIO()
img.save(buffered, format="BMP") # BMP doesn't compress
data = base64.b64encode(buffered.getvalue()).decode("utf-8")
return f"data:image/bmp;base64,{data}"


def test_threshold_applies_for_each_row_independently(requests_mock):
DUMMY_API_KEY = "test api key"
image_300kb = generate_image(approx_kbytes=300)
image_200kb = generate_image(approx_kbytes=200)
input_dataset = {
"rows": [
{
"text_input": [
{"image_url": {"url": image_300kb}},
]
},
{
"text_input": [
{"image_url": {"url": image_200kb}},
]
},
]
}

sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY)

new_dataset = sut.upload_large_assets(input_dataset)

rows = new_dataset["rows"]
assert (
rows[0]["text_input"][0]["image_url"]["url"] == image_300kb
), "300kb asset should not be uploaded"
assert (
rows[1]["text_input"][0]["image_url"]["url"] == image_200kb
), "200kb asset should not be uploaded"


@pytest.mark.parametrize(
"threshold_kbytes",
[100, 400],
)
def test_upload_images(requests_mock, threshold_kbytes):
DUMMY_API_KEY = "test api key"
DUMMY_ASSET_ID = "dummy asset id"
DUMMY_UPLOAD_URL = "https://dummy-upload-url"
NEW_ASSET_RESP = {
"assetId": DUMMY_ASSET_ID,
"uploadUrl": DUMMY_UPLOAD_URL,
"contentType": "image/jpeg",
"description": "test image",
}
image_300kb = generate_image(approx_kbytes=300)
text_200kb = 200_000 * "!"
input_dataset = {
"rows": [
{
"text_input": [
{"text": text_200kb},
{"image_url": {"url": image_300kb}},
]
}
]
}

sut = NvcfUploader(threshold_kbytes=threshold_kbytes, nvcf_api_key=DUMMY_API_KEY)

requests_mock.post(
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
)
requests_mock.put(DUMMY_UPLOAD_URL)
new_dataset = sut.upload_large_assets(input_dataset)

prompts = new_dataset["rows"][0]["text_input"]
assert "text" in prompts[0], "prompts order not preserved"
assert prompts[0]["text"] == text_200kb, "text asset should not be uploaded"
assert "image_url" in prompts[1], "prompts order not preserved"
assert prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}"


def test_payload_is_closer_to_threshold(requests_mock):
DUMMY_API_KEY = "test api key"
DUMMY_ASSET_ID = "dummy asset id"
DUMMY_UPLOAD_URL = "https://dummy-upload-url"
NEW_ASSET_RESP = {
"assetId": DUMMY_ASSET_ID,
"uploadUrl": DUMMY_UPLOAD_URL,
"contentType": "image/jpeg",
"description": "test image",
}
image_300kb = generate_image(approx_kbytes=300)
image_200kb = generate_image(approx_kbytes=200)
input_dataset = {
"rows": [
{
"text_input": [
{"image_url": {"url": image_300kb}},
{"image_url": {"url": image_200kb}},
]
}
]
}

sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=400)

requests_mock.post(
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
)
requests_mock.put(DUMMY_UPLOAD_URL)
new_dataset = sut.upload_large_assets(input_dataset)

prompts = new_dataset["rows"][0]["text_input"]
assert (
prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}"
), "smaller image should be uploaded"
assert (
prompts[0]["image_url"]["url"] == image_300kb
), "larger image should not be uploaded"


def test_upload_report(requests_mock):
DUMMY_API_KEY = "test api key"
DUMMY_ASSET_ID = "dummy asset id"
DUMMY_UPLOAD_URL = "https://dummy-upload-url"
NEW_ASSET_RESP = {
"assetId": DUMMY_ASSET_ID,
"uploadUrl": DUMMY_UPLOAD_URL,
"contentType": "image/jpeg",
"description": "test image",
}
image_300kb = generate_image(approx_kbytes=300)
input_dataset = {
"rows": [
{
"text_input": [
{"image_url": {"url": image_300kb}},
]
}
]
}

sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=200)

requests_mock.post(
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
)
requests_mock.put(DUMMY_UPLOAD_URL)
sut.upload_large_assets(input_dataset)

report = sut.get_upload_report()
assert DUMMY_ASSET_ID in report, "file upload not recorded"
Loading