diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py new file mode 100644 index 000000000..5f2043d41 --- /dev/null +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py @@ -0,0 +1,114 @@ +import base64 +import json +import time +from pathlib import Path +from typing import Dict + +import requests + + +def knapsack(S, sizes): + n = len(sizes) + dp = [[0] * (S + 1) for _ in range(n + 1)] + + for i in range(1, n + 1): + for j in range(S + 1): + if sizes[i - 1] <= j: + dp[i][j] = max(dp[i - 1][j], dp[i - 1][j - sizes[i - 1]] + sizes[i - 1]) + else: + dp[i][j] = dp[i - 1][j] + + # Backtracking to find the items included in the optimal solution + result = [] + w = S + for i in range(n, 0, -1): + if dp[i][w] != dp[i - 1][w]: + result.append(i - 1) + w -= sizes[i - 1] + + result.reverse() # The list of indices is reversed to be in the original order + return result + + +NVCF_URL = "https://api.nvcf.nvidia.com/v2/nvcf" + + +class NvcfUploader: + def __init__(self, threshold_kbytes: int, nvcf_api_key: str): + self.threshold_kbytes = threshold_kbytes + self._upload_report: Dict[str, float] = {} + self._initialize_headers(nvcf_api_key) + + def _initialize_headers(self, nvcf_api_key): + self._headers = { + "Authorization": f"Bearer {nvcf_api_key}", + "accept": "application/json", + "content-type": "application/json", + } + + def _add_upload_report_entry(self, asset_id, time_delta): + self._upload_report[asset_id] = time_delta + + def get_upload_report(self): + return self._upload_report.copy() + + def upload_large_assets(self, dataset: Dict): + sizes, entries = self._find_uploadable(dataset) + non_uploadable = self._calculate_data_size(dataset) - sum(sizes) + payload_limit = self.threshold_kbytes * 1000 - non_uploadable + take = knapsack(payload_limit, sizes) + upload = set(range(len(entries))) - set(take) + for entry in (entries[i] for i in upload): + self._upload_image(entry) + return dataset + + def _calculate_data_size(self, data): + return len(json.dumps(data)) + + def _find_uploadable(self, dataset): + found = zip( + *( + (self._calculate_data_size(entry), entry) + for row in dataset["rows"] + for entry in row.get("text_input", {}) + if "image_url" in entry + ) + ) + found = list(found) + if not found: + return [], [] + else: + return found + + def _decode_base64_img_url(self, data): + prefix, payload = data.split(";") + _, img_format = prefix.split("/") + _, img_base64 = payload.split(",") + img = base64.b64decode(img_base64) + return img_format, img + + def _upload_image_to_nvcf(self, data, img_format): + json = { + "contentType": "image/{img_format}", + "description": "GenAI-perf synthetic image", + } + new_asset_resp = requests.post( + f"{NVCF_URL}/assets", headers=self._headers, json=json + ).json() + upload_headers = { + "Content-Type": json["contentType"], + "x-amz-meta-nvcf-asset-description": json["description"], + } + upload_resp = requests.put( + new_asset_resp["uploadUrl"], headers=upload_headers, data=data + ) + return new_asset_resp["assetId"] + + def _upload_image(self, data): + img_format, img = self._decode_base64_img_url(data["image_url"]) + + start_time = time.perf_counter() + asset_id = self._upload_image_to_nvcf(img, img_format) + data["image_url"] = f"data:image/{img_format};asset_id,{asset_id}" + end_time = time.perf_counter() + self._add_upload_report_entry(asset_id, end_time - start_time) diff --git a/src/c++/perf_analyzer/genai-perf/pyproject.toml b/src/c++/perf_analyzer/genai-perf/pyproject.toml index 68d5e3740..797c3d135 100644 --- a/src/c++/perf_analyzer/genai-perf/pyproject.toml +++ b/src/c++/perf_analyzer/genai-perf/pyproject.toml @@ -48,6 +48,7 @@ requires-python = ">=3.8,<4" dependencies = [ "numpy<2", "pytest", + "requests-mock", "rich", "transformers", "plotly", diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py new file mode 100644 index 000000000..4bb16833b --- /dev/null +++ b/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py @@ -0,0 +1,141 @@ +import base64 +import json +from io import BytesIO + +from genai_perf.llm_inputs.nvcf_assets import NvcfUploader +from PIL import Image + + +def test_not_upload_text(): + DUMMY_API_KEY = "test api key" + input_dataset = { + "rows": [{"text_input": "small message"}], + } + sut = NvcfUploader(threshold_kbytes=0, nvcf_api_key=DUMMY_API_KEY) + + new_dataset = sut.upload_large_assets(input_dataset) + + assert ( + new_dataset == input_dataset + ), "There is no row to upload - dataset should stay unchanged" + + +def generate_image(approx_kbytes): + estimated_base64_ratio = 4 / 3 # Base64 encoding increases size by about 33% + color_channels = 3 + npixels = approx_kbytes * 1000 / color_channels / estimated_base64_ratio + width = height = int(npixels**0.5) + img = Image.new("RGB", (width, height), color="white") + buffered = BytesIO() + img.save(buffered, format="BMP") # BMP doesn't compress + data = base64.b64encode(buffered.getvalue()).decode("utf-8") + return f"data:image/bmp;base64,{data}" + + +def test_upload_images(requests_mock): + DUMMY_API_KEY = "test api key" + DUMMY_ASSET_ID = "dummy asset id" + DUMMY_UPLOAD_URL = "https://dummy-upload-url" + NEW_ASSET_RESP = { + "assetId": DUMMY_ASSET_ID, + "uploadUrl": DUMMY_UPLOAD_URL, + "contentType": "image/jpeg", + "description": "test image", + } + image_300kb = generate_image(approx_kbytes=300) + text_200kb = 200_000 * "!" + input_dataset = { + "rows": [ + { + "text_input": [ + {"text": text_200kb}, + {"image_url": image_300kb}, + ] + } + ] + } + + sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY) + + requests_mock.post( + "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP + ) + requests_mock.put(DUMMY_UPLOAD_URL) + new_dataset = sut.upload_large_assets(input_dataset) + + rows = new_dataset["rows"][0]["text_input"] + assert "text" in rows[0], "prompts order not preserved" + assert rows[0]["text"] == text_200kb, "text asset should not be uploaded" + assert "image_url" in rows[1], "prompts order not preserved" + assert rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" + + +def test_payload_is_closer_to_threshold(requests_mock): + DUMMY_API_KEY = "test api key" + DUMMY_ASSET_ID = "dummy asset id" + DUMMY_UPLOAD_URL = "https://dummy-upload-url" + NEW_ASSET_RESP = { + "assetId": DUMMY_ASSET_ID, + "uploadUrl": DUMMY_UPLOAD_URL, + "contentType": "image/jpeg", + "description": "test image", + } + image_300kb = generate_image(approx_kbytes=300) + image_200kb = generate_image(approx_kbytes=200) + input_dataset = { + "rows": [ + { + "text_input": [ + {"image_url": image_300kb}, + {"image_url": image_200kb}, + ] + } + ] + } + + sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=400) + + requests_mock.post( + "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP + ) + requests_mock.put(DUMMY_UPLOAD_URL) + new_dataset = sut.upload_large_assets(input_dataset) + + rows = new_dataset["rows"][0]["text_input"] + assert ( + rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" + ), "smaller image should be uploaded" + assert rows[0]["image_url"] == image_300kb, "larger image should not be uploaded" + + +def test_upload_report(requests_mock): + DUMMY_API_KEY = "test api key" + DUMMY_ASSET_ID = "dummy asset id" + DUMMY_UPLOAD_URL = "https://dummy-upload-url" + NEW_ASSET_RESP = { + "assetId": DUMMY_ASSET_ID, + "uploadUrl": DUMMY_UPLOAD_URL, + "contentType": "image/jpeg", + "description": "test image", + } + image_300kb = generate_image(approx_kbytes=300) + input_dataset = { + "rows": [ + { + "text_input": [ + {"image_url": image_300kb}, + ] + } + ] + } + + sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=200) + + requests_mock.post( + "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP + ) + requests_mock.put(DUMMY_UPLOAD_URL) + sut.upload_large_assets(input_dataset) + + report = sut.get_upload_report() + assert DUMMY_ASSET_ID in report, "file upload not recorded"