-
Notifications
You must be signed in to change notification settings - Fork 234
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
NVCF assets support #755
Closed
+286
−0
Closed
NVCF assets support #755
Changes from all commits
Commits
Show all changes
2 commits
Select commit
Hold shift + click to select a range
File filter
Filter by extension
Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
There are no files selected for viewing
105 changes: 105 additions & 0 deletions
105
src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,105 @@ | ||
import base64 | ||
import json | ||
import time | ||
from pathlib import Path | ||
from typing import Dict | ||
|
||
import genai_perf.logging as logging | ||
Check notice Code scanning / CodeQL Unused import Note
Import of 'logging' is not used.
|
||
import requests | ||
|
||
|
||
def greedy_fill(size_limit, sizes): | ||
remaining = size_limit | ||
selected = [] | ||
for i, size in sorted(enumerate(sizes), key=lambda x: -x[1]): | ||
if size <= remaining: | ||
selected.append(i) | ||
remaining -= size | ||
return selected | ||
|
||
|
||
NVCF_URL = "https://api.nvcf.nvidia.com/v2/nvcf" | ||
|
||
|
||
class NvcfUploader: | ||
def __init__(self, threshold_kbytes: int, nvcf_api_key: str): | ||
self.threshold_kbytes = threshold_kbytes | ||
self._upload_report: Dict[str, float] = {} | ||
self._initialize_headers(nvcf_api_key) | ||
|
||
def _initialize_headers(self, nvcf_api_key): | ||
self._headers = { | ||
"Authorization": f"Bearer {nvcf_api_key}", | ||
"accept": "application/json", | ||
"content-type": "application/json", | ||
} | ||
|
||
def _add_upload_report_entry(self, asset_id, time_delta): | ||
self._upload_report[asset_id] = time_delta | ||
|
||
def get_upload_report(self): | ||
return self._upload_report.copy() | ||
|
||
def upload_large_assets(self, dataset: Dict): | ||
for row in dataset["rows"]: | ||
sizes, entries = self._find_uploadable(row) | ||
non_uploadable = self._calculate_data_size(row) - sum(sizes) | ||
payload_limit = max(0, self.threshold_kbytes * 1000 - non_uploadable) | ||
take = greedy_fill(payload_limit, sizes) | ||
upload = set(range(len(entries))) - set(take) | ||
for entry in (entries[i] for i in upload): | ||
self._upload_image(entry) | ||
return dataset | ||
|
||
def _calculate_data_size(self, data): | ||
return len(json.dumps(data)) | ||
|
||
def _find_uploadable(self, row): | ||
found = zip( | ||
*( | ||
(self._calculate_data_size(entry), entry) | ||
for entry in row.get("text_input", {}) | ||
if "image_url" in entry | ||
) | ||
) | ||
found = list(found) | ||
if not found: | ||
return [], [] | ||
else: | ||
return found | ||
|
||
def _decode_base64_img_url(self, data): | ||
prefix, payload = data.split(";") | ||
_, img_format = prefix.split("/") | ||
_, img_base64 = payload.split(",") | ||
img = base64.b64decode(img_base64) | ||
return img_format, img | ||
|
||
def _upload_image_to_nvcf(self, data, img_format): | ||
json = { | ||
"contentType": f"image/{img_format}", | ||
"description": "GenAI-perf synthetic image", | ||
} | ||
new_asset_resp = requests.post( | ||
f"{NVCF_URL}/assets", headers=self._headers, json=json | ||
).json() | ||
upload_headers = { | ||
"Content-Type": json["contentType"], | ||
"x-amz-meta-nvcf-asset-description": json["description"], | ||
} | ||
upload_resp = requests.put( | ||
new_asset_resp["uploadUrl"], headers=upload_headers, data=data | ||
) | ||
print( | ||
f"Uploaded asset {new_asset_resp['assetId']} with status {upload_resp.status_code}" | ||
) | ||
return new_asset_resp["assetId"] | ||
|
||
def _upload_image(self, data): | ||
img_format, img = self._decode_base64_img_url(data["image_url"]["url"]) | ||
|
||
start_time = time.perf_counter() | ||
asset_id = self._upload_image_to_nvcf(img, img_format) | ||
data["image_url"]["url"] = f"data:image/{img_format};asset_id,{asset_id}" | ||
end_time = time.perf_counter() | ||
self._add_upload_report_entry(asset_id, end_time - start_time) |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
180 changes: 180 additions & 0 deletions
180
src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,180 @@ | ||
import base64 | ||
import json | ||
Check notice Code scanning / CodeQL Unused import Note test
Import of 'json' is not used.
|
||
from io import BytesIO | ||
|
||
import pytest | ||
from genai_perf.llm_inputs.nvcf_assets import NvcfUploader | ||
from PIL import Image | ||
|
||
|
||
def test_not_upload_text(): | ||
DUMMY_API_KEY = "test api key" | ||
input_dataset = { | ||
"rows": [{"text_input": "small message"}], | ||
} | ||
sut = NvcfUploader(threshold_kbytes=0, nvcf_api_key=DUMMY_API_KEY) | ||
|
||
new_dataset = sut.upload_large_assets(input_dataset) | ||
|
||
assert ( | ||
new_dataset == input_dataset | ||
), "There is no row to upload - dataset should stay unchanged" | ||
|
||
|
||
def generate_image(approx_kbytes): | ||
estimated_base64_ratio = 4 / 3 # Base64 encoding increases size by about 33% | ||
color_channels = 3 | ||
npixels = approx_kbytes * 1000 / color_channels / estimated_base64_ratio | ||
width = height = int(npixels**0.5) | ||
img = Image.new("RGB", (width, height), color="white") | ||
buffered = BytesIO() | ||
img.save(buffered, format="BMP") # BMP doesn't compress | ||
data = base64.b64encode(buffered.getvalue()).decode("utf-8") | ||
return f"data:image/bmp;base64,{data}" | ||
|
||
|
||
def test_threshold_applies_for_each_row_independently(requests_mock): | ||
DUMMY_API_KEY = "test api key" | ||
image_300kb = generate_image(approx_kbytes=300) | ||
image_200kb = generate_image(approx_kbytes=200) | ||
input_dataset = { | ||
"rows": [ | ||
{ | ||
"text_input": [ | ||
{"image_url": {"url": image_300kb}}, | ||
] | ||
}, | ||
{ | ||
"text_input": [ | ||
{"image_url": {"url": image_200kb}}, | ||
] | ||
}, | ||
] | ||
} | ||
|
||
sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY) | ||
|
||
new_dataset = sut.upload_large_assets(input_dataset) | ||
|
||
rows = new_dataset["rows"] | ||
assert ( | ||
rows[0]["text_input"][0]["image_url"]["url"] == image_300kb | ||
), "300kb asset should not be uploaded" | ||
assert ( | ||
rows[1]["text_input"][0]["image_url"]["url"] == image_200kb | ||
), "200kb asset should not be uploaded" | ||
|
||
|
||
@pytest.mark.parametrize( | ||
"threshold_kbytes", | ||
[100, 400], | ||
) | ||
def test_upload_images(requests_mock, threshold_kbytes): | ||
DUMMY_API_KEY = "test api key" | ||
DUMMY_ASSET_ID = "dummy asset id" | ||
DUMMY_UPLOAD_URL = "https://dummy-upload-url" | ||
NEW_ASSET_RESP = { | ||
"assetId": DUMMY_ASSET_ID, | ||
"uploadUrl": DUMMY_UPLOAD_URL, | ||
"contentType": "image/jpeg", | ||
"description": "test image", | ||
} | ||
image_300kb = generate_image(approx_kbytes=300) | ||
text_200kb = 200_000 * "!" | ||
input_dataset = { | ||
"rows": [ | ||
{ | ||
"text_input": [ | ||
{"text": text_200kb}, | ||
{"image_url": {"url": image_300kb}}, | ||
] | ||
} | ||
] | ||
} | ||
|
||
sut = NvcfUploader(threshold_kbytes=threshold_kbytes, nvcf_api_key=DUMMY_API_KEY) | ||
|
||
requests_mock.post( | ||
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP | ||
) | ||
requests_mock.put(DUMMY_UPLOAD_URL) | ||
new_dataset = sut.upload_large_assets(input_dataset) | ||
|
||
prompts = new_dataset["rows"][0]["text_input"] | ||
assert "text" in prompts[0], "prompts order not preserved" | ||
assert prompts[0]["text"] == text_200kb, "text asset should not be uploaded" | ||
assert "image_url" in prompts[1], "prompts order not preserved" | ||
assert prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" | ||
|
||
|
||
def test_payload_is_closer_to_threshold(requests_mock): | ||
DUMMY_API_KEY = "test api key" | ||
DUMMY_ASSET_ID = "dummy asset id" | ||
DUMMY_UPLOAD_URL = "https://dummy-upload-url" | ||
NEW_ASSET_RESP = { | ||
"assetId": DUMMY_ASSET_ID, | ||
"uploadUrl": DUMMY_UPLOAD_URL, | ||
"contentType": "image/jpeg", | ||
"description": "test image", | ||
} | ||
image_300kb = generate_image(approx_kbytes=300) | ||
image_200kb = generate_image(approx_kbytes=200) | ||
input_dataset = { | ||
"rows": [ | ||
{ | ||
"text_input": [ | ||
{"image_url": {"url": image_300kb}}, | ||
{"image_url": {"url": image_200kb}}, | ||
] | ||
} | ||
] | ||
} | ||
|
||
sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=400) | ||
|
||
requests_mock.post( | ||
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP | ||
) | ||
requests_mock.put(DUMMY_UPLOAD_URL) | ||
new_dataset = sut.upload_large_assets(input_dataset) | ||
|
||
prompts = new_dataset["rows"][0]["text_input"] | ||
assert ( | ||
prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" | ||
), "smaller image should be uploaded" | ||
assert ( | ||
prompts[0]["image_url"]["url"] == image_300kb | ||
), "larger image should not be uploaded" | ||
|
||
|
||
def test_upload_report(requests_mock): | ||
DUMMY_API_KEY = "test api key" | ||
DUMMY_ASSET_ID = "dummy asset id" | ||
DUMMY_UPLOAD_URL = "https://dummy-upload-url" | ||
NEW_ASSET_RESP = { | ||
"assetId": DUMMY_ASSET_ID, | ||
"uploadUrl": DUMMY_UPLOAD_URL, | ||
"contentType": "image/jpeg", | ||
"description": "test image", | ||
} | ||
image_300kb = generate_image(approx_kbytes=300) | ||
input_dataset = { | ||
"rows": [ | ||
{ | ||
"text_input": [ | ||
{"image_url": {"url": image_300kb}}, | ||
] | ||
} | ||
] | ||
} | ||
|
||
sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=200) | ||
|
||
requests_mock.post( | ||
"https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP | ||
) | ||
requests_mock.put(DUMMY_UPLOAD_URL) | ||
sut.upload_large_assets(input_dataset) | ||
|
||
report = sut.get_upload_report() | ||
assert DUMMY_ASSET_ID in report, "file upload not recorded" |
Oops, something went wrong.
Add this suggestion to a batch that can be applied as a single commit.
This suggestion is invalid because no changes were made to the code.
Suggestions cannot be applied while the pull request is closed.
Suggestions cannot be applied while viewing a subset of changes.
Only one suggestion per line can be applied in a batch.
Add this suggestion to a batch that can be applied as a single commit.
Applying suggestions on deleted lines is not supported.
You must change the existing code in this line in order to create a valid suggestion.
Outdated suggestions cannot be applied.
This suggestion has been applied or marked resolved.
Suggestions cannot be applied from pending reviews.
Suggestions cannot be applied on multi-line comments.
Suggestions cannot be applied while the pull request is queued to merge.
Suggestion cannot be applied right now. Please check back later.
Check notice
Code scanning / CodeQL
Unused import Note