diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py index 5f2043d41..c17d6a15a 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py @@ -4,30 +4,18 @@ from pathlib import Path from typing import Dict +import genai_perf.logging as logging import requests -def knapsack(S, sizes): - n = len(sizes) - dp = [[0] * (S + 1) for _ in range(n + 1)] - - for i in range(1, n + 1): - for j in range(S + 1): - if sizes[i - 1] <= j: - dp[i][j] = max(dp[i - 1][j], dp[i - 1][j - sizes[i - 1]] + sizes[i - 1]) - else: - dp[i][j] = dp[i - 1][j] - - # Backtracking to find the items included in the optimal solution - result = [] - w = S - for i in range(n, 0, -1): - if dp[i][w] != dp[i - 1][w]: - result.append(i - 1) - w -= sizes[i - 1] - - result.reverse() # The list of indices is reversed to be in the original order - return result +def greedy_fill(size_limit, sizes): + remaining = size_limit + selected = [] + for i, size in sorted(enumerate(sizes), key=lambda x: -x[1]): + if size <= remaining: + selected.append(i) + remaining -= size + return selected NVCF_URL = "https://api.nvcf.nvidia.com/v2/nvcf" @@ -53,23 +41,23 @@ def get_upload_report(self): return self._upload_report.copy() def upload_large_assets(self, dataset: Dict): - sizes, entries = self._find_uploadable(dataset) - non_uploadable = self._calculate_data_size(dataset) - sum(sizes) - payload_limit = self.threshold_kbytes * 1000 - non_uploadable - take = knapsack(payload_limit, sizes) - upload = set(range(len(entries))) - set(take) - for entry in (entries[i] for i in upload): - self._upload_image(entry) + for row in dataset["rows"]: + sizes, entries = self._find_uploadable(row) + non_uploadable = self._calculate_data_size(row) - sum(sizes) + payload_limit = max(0, self.threshold_kbytes * 1000 - non_uploadable) + take = greedy_fill(payload_limit, sizes) + upload = set(range(len(entries))) - set(take) + for entry in (entries[i] for i in upload): + self._upload_image(entry) return dataset def _calculate_data_size(self, data): return len(json.dumps(data)) - def _find_uploadable(self, dataset): + def _find_uploadable(self, row): found = zip( *( (self._calculate_data_size(entry), entry) - for row in dataset["rows"] for entry in row.get("text_input", {}) if "image_url" in entry ) @@ -89,7 +77,7 @@ def _decode_base64_img_url(self, data): def _upload_image_to_nvcf(self, data, img_format): json = { - "contentType": "image/{img_format}", + "contentType": f"image/{img_format}", "description": "GenAI-perf synthetic image", } new_asset_resp = requests.post( @@ -102,13 +90,16 @@ def _upload_image_to_nvcf(self, data, img_format): upload_resp = requests.put( new_asset_resp["uploadUrl"], headers=upload_headers, data=data ) + print( + f"Uploaded asset {new_asset_resp['assetId']} with status {upload_resp.status_code}" + ) return new_asset_resp["assetId"] def _upload_image(self, data): - img_format, img = self._decode_base64_img_url(data["image_url"]) + img_format, img = self._decode_base64_img_url(data["image_url"]["url"]) start_time = time.perf_counter() asset_id = self._upload_image_to_nvcf(img, img_format) - data["image_url"] = f"data:image/{img_format};asset_id,{asset_id}" + data["image_url"]["url"] = f"data:image/{img_format};asset_id,{asset_id}" end_time = time.perf_counter() self._add_upload_report_entry(asset_id, end_time - start_time) diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py index 4bb16833b..77e93742b 100644 --- a/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py +++ b/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py @@ -2,6 +2,7 @@ import json from io import BytesIO +import pytest from genai_perf.llm_inputs.nvcf_assets import NvcfUploader from PIL import Image @@ -32,7 +33,43 @@ def generate_image(approx_kbytes): return f"data:image/bmp;base64,{data}" -def test_upload_images(requests_mock): +def test_threshold_applies_for_each_row_independently(requests_mock): + DUMMY_API_KEY = "test api key" + image_300kb = generate_image(approx_kbytes=300) + image_200kb = generate_image(approx_kbytes=200) + input_dataset = { + "rows": [ + { + "text_input": [ + {"image_url": {"url": image_300kb}}, + ] + }, + { + "text_input": [ + {"image_url": {"url": image_200kb}}, + ] + }, + ] + } + + sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY) + + new_dataset = sut.upload_large_assets(input_dataset) + + rows = new_dataset["rows"] + assert ( + rows[0]["text_input"][0]["image_url"]["url"] == image_300kb + ), "300kb asset should not be uploaded" + assert ( + rows[1]["text_input"][0]["image_url"]["url"] == image_200kb + ), "200kb asset should not be uploaded" + + +@pytest.mark.parametrize( + "threshold_kbytes", + [100, 400], +) +def test_upload_images(requests_mock, threshold_kbytes): DUMMY_API_KEY = "test api key" DUMMY_ASSET_ID = "dummy asset id" DUMMY_UPLOAD_URL = "https://dummy-upload-url" @@ -49,13 +86,13 @@ def test_upload_images(requests_mock): { "text_input": [ {"text": text_200kb}, - {"image_url": image_300kb}, + {"image_url": {"url": image_300kb}}, ] } ] } - sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY) + sut = NvcfUploader(threshold_kbytes=threshold_kbytes, nvcf_api_key=DUMMY_API_KEY) requests_mock.post( "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP @@ -63,11 +100,11 @@ def test_upload_images(requests_mock): requests_mock.put(DUMMY_UPLOAD_URL) new_dataset = sut.upload_large_assets(input_dataset) - rows = new_dataset["rows"][0]["text_input"] - assert "text" in rows[0], "prompts order not preserved" - assert rows[0]["text"] == text_200kb, "text asset should not be uploaded" - assert "image_url" in rows[1], "prompts order not preserved" - assert rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" + prompts = new_dataset["rows"][0]["text_input"] + assert "text" in prompts[0], "prompts order not preserved" + assert prompts[0]["text"] == text_200kb, "text asset should not be uploaded" + assert "image_url" in prompts[1], "prompts order not preserved" + assert prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" def test_payload_is_closer_to_threshold(requests_mock): @@ -86,8 +123,8 @@ def test_payload_is_closer_to_threshold(requests_mock): "rows": [ { "text_input": [ - {"image_url": image_300kb}, - {"image_url": image_200kb}, + {"image_url": {"url": image_300kb}}, + {"image_url": {"url": image_200kb}}, ] } ] @@ -101,11 +138,13 @@ def test_payload_is_closer_to_threshold(requests_mock): requests_mock.put(DUMMY_UPLOAD_URL) new_dataset = sut.upload_large_assets(input_dataset) - rows = new_dataset["rows"][0]["text_input"] + prompts = new_dataset["rows"][0]["text_input"] assert ( - rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" + prompts[1]["image_url"]["url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}" ), "smaller image should be uploaded" - assert rows[0]["image_url"] == image_300kb, "larger image should not be uploaded" + assert ( + prompts[0]["image_url"]["url"] == image_300kb + ), "larger image should not be uploaded" def test_upload_report(requests_mock): @@ -123,7 +162,7 @@ def test_upload_report(requests_mock): "rows": [ { "text_input": [ - {"image_url": image_300kb}, + {"image_url": {"url": image_300kb}}, ] } ]