NVCF assets uploader

triton-inference-server · Jul 23, 2024 · 47ffde2 · 47ffde2
1 parent 3e1dbb1
commit 47ffde2
Show file tree

Hide file tree

Showing 3 changed files with 256 additions and 0 deletions.
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/nvcf_assets.py
@@ -0,0 +1,114 @@
+import base64
+import json
+import time
+from pathlib import Path
+from typing import Dict
+
+import requests
+
+
+def knapsack(S, sizes):
+    n = len(sizes)
+    dp = [[0] * (S + 1) for _ in range(n + 1)]
+
+    for i in range(1, n + 1):
+        for j in range(S + 1):
+            if sizes[i - 1] <= j:
+                dp[i][j] = max(dp[i - 1][j], dp[i - 1][j - sizes[i - 1]] + sizes[i - 1])
+            else:
+                dp[i][j] = dp[i - 1][j]
+
+    # Backtracking to find the items included in the optimal solution
+    result = []
+    w = S
+    for i in range(n, 0, -1):
+        if dp[i][w] != dp[i - 1][w]:
+            result.append(i - 1)
+            w -= sizes[i - 1]
+
+    result.reverse()  # The list of indices is reversed to be in the original order
+    return result
+
+
+NVCF_URL = "https://api.nvcf.nvidia.com/v2/nvcf"
+
+
+class NvcfUploader:
+    def __init__(self, threshold_kbytes: int, nvcf_api_key: str):
+        self.threshold_kbytes = threshold_kbytes
+        self._upload_report: Dict[str, float] = {}
+        self._initialize_headers(nvcf_api_key)
+
+    def _initialize_headers(self, nvcf_api_key):
+        self._headers = {
+            "Authorization": f"Bearer {nvcf_api_key}",
+            "accept": "application/json",
+            "content-type": "application/json",
+        }
+
+    def _add_upload_report_entry(self, asset_id, time_delta):
+        self._upload_report[asset_id] = time_delta
+
+    def get_upload_report(self):
+        return self._upload_report.copy()
+
+    def upload_large_assets(self, dataset: Dict):
+        sizes, entries = self._find_uploadable(dataset)
+        non_uploadable = self._calculate_data_size(dataset) - sum(sizes)
+        payload_limit = self.threshold_kbytes * 1000 - non_uploadable
+        take = knapsack(payload_limit, sizes)
+        upload = set(range(len(entries))) - set(take)
+        for entry in (entries[i] for i in upload):
+            self._upload_image(entry)
+        return dataset
+
+    def _calculate_data_size(self, data):
+        return len(json.dumps(data))
+
+    def _find_uploadable(self, dataset):
+        found = zip(
+            *(
+                (self._calculate_data_size(entry), entry)
+                for row in dataset["rows"]
+                for entry in row.get("text_input", {})
+                if "image_url" in entry
+            )
+        )
+        found = list(found)
+        if not found:
+            return [], []
+        else:
+            return found
+
+    def _decode_base64_img_url(self, data):
+        prefix, payload = data.split(";")
+        _, img_format = prefix.split("/")
+        _, img_base64 = payload.split(",")
+        img = base64.b64decode(img_base64)
+        return img_format, img
+
+    def _upload_image_to_nvcf(self, data, img_format):
+        json = {
+            "contentType": "image/{img_format}",
+            "description": "GenAI-perf synthetic image",
+        }
+        new_asset_resp = requests.post(
+            f"{NVCF_URL}/assets", headers=self._headers, json=json
+        ).json()
+        upload_headers = {
+            "Content-Type": json["contentType"],
+            "x-amz-meta-nvcf-asset-description": json["description"],
+        }
+        upload_resp = requests.put(
+            new_asset_resp["uploadUrl"], headers=upload_headers, data=data
+        )
+        return new_asset_resp["assetId"]
+
+    def _upload_image(self, data):
+        img_format, img = self._decode_base64_img_url(data["image_url"])
+
+        start_time = time.perf_counter()
+        asset_id = self._upload_image_to_nvcf(img, img_format)
+        data["image_url"] = f"data:image/{img_format};asset_id,{asset_id}"
+        end_time = time.perf_counter()
+        self._add_upload_report_entry(asset_id, end_time - start_time)
diff --git a/src/c++/perf_analyzer/genai-perf/pyproject.toml b/src/c++/perf_analyzer/genai-perf/pyproject.toml
@@ -48,6 +48,7 @@ requires-python = ">=3.8,<4"
 dependencies = [
   "numpy<2",
   "pytest",
+  "requests-mock",
   "rich",
   "transformers",
   "plotly",

diff --git a/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py b/src/c++/perf_analyzer/genai-perf/tests/test_nvcf_assets.py
@@ -0,0 +1,141 @@
+import base64
+import json
+from io import BytesIO
+
+from genai_perf.llm_inputs.nvcf_assets import NvcfUploader
+from PIL import Image
+
+
+def test_not_upload_text():
+    DUMMY_API_KEY = "test api key"
+    input_dataset = {
+        "rows": [{"text_input": "small message"}],
+    }
+    sut = NvcfUploader(threshold_kbytes=0, nvcf_api_key=DUMMY_API_KEY)
+
+    new_dataset = sut.upload_large_assets(input_dataset)
+
+    assert (
+        new_dataset == input_dataset
+    ), "There is no row to upload - dataset should stay unchanged"
+
+
+def generate_image(approx_kbytes):
+    estimated_base64_ratio = 4 / 3  # Base64 encoding increases size by about 33%
+    color_channels = 3
+    npixels = approx_kbytes * 1000 / color_channels / estimated_base64_ratio
+    width = height = int(npixels**0.5)
+    img = Image.new("RGB", (width, height), color="white")
+    buffered = BytesIO()
+    img.save(buffered, format="BMP")  # BMP doesn't compress
+    data = base64.b64encode(buffered.getvalue()).decode("utf-8")
+    return f"data:image/bmp;base64,{data}"
+
+
+def test_upload_images(requests_mock):
+    DUMMY_API_KEY = "test api key"
+    DUMMY_ASSET_ID = "dummy asset id"
+    DUMMY_UPLOAD_URL = "https://dummy-upload-url"
+    NEW_ASSET_RESP = {
+        "assetId": DUMMY_ASSET_ID,
+        "uploadUrl": DUMMY_UPLOAD_URL,
+        "contentType": "image/jpeg",
+        "description": "test image",
+    }
+    image_300kb = generate_image(approx_kbytes=300)
+    text_200kb = 200_000 * "!"
+    input_dataset = {
+        "rows": [
+            {
+                "text_input": [
+                    {"text": text_200kb},
+                    {"image_url": image_300kb},
+                ]
+            }
+        ]
+    }
+
+    sut = NvcfUploader(threshold_kbytes=400, nvcf_api_key=DUMMY_API_KEY)
+
+    requests_mock.post(
+        "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
+    )
+    requests_mock.put(DUMMY_UPLOAD_URL)
+    new_dataset = sut.upload_large_assets(input_dataset)
+
+    rows = new_dataset["rows"][0]["text_input"]
+    assert "text" in rows[0], "prompts order not preserved"
+    assert rows[0]["text"] == text_200kb, "text asset should not be uploaded"
+    assert "image_url" in rows[1], "prompts order not preserved"
+    assert rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}"
+
+
+def test_payload_is_closer_to_threshold(requests_mock):
+    DUMMY_API_KEY = "test api key"
+    DUMMY_ASSET_ID = "dummy asset id"
+    DUMMY_UPLOAD_URL = "https://dummy-upload-url"
+    NEW_ASSET_RESP = {
+        "assetId": DUMMY_ASSET_ID,
+        "uploadUrl": DUMMY_UPLOAD_URL,
+        "contentType": "image/jpeg",
+        "description": "test image",
+    }
+    image_300kb = generate_image(approx_kbytes=300)
+    image_200kb = generate_image(approx_kbytes=200)
+    input_dataset = {
+        "rows": [
+            {
+                "text_input": [
+                    {"image_url": image_300kb},
+                    {"image_url": image_200kb},
+                ]
+            }
+        ]
+    }
+
+    sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=400)
+
+    requests_mock.post(
+        "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
+    )
+    requests_mock.put(DUMMY_UPLOAD_URL)
+    new_dataset = sut.upload_large_assets(input_dataset)
+
+    rows = new_dataset["rows"][0]["text_input"]
+    assert (
+        rows[1]["image_url"] == f"data:image/bmp;asset_id,{DUMMY_ASSET_ID}"
+    ), "smaller image should be uploaded"
+    assert rows[0]["image_url"] == image_300kb, "larger image should not be uploaded"
+
+
+def test_upload_report(requests_mock):
+    DUMMY_API_KEY = "test api key"
+    DUMMY_ASSET_ID = "dummy asset id"
+    DUMMY_UPLOAD_URL = "https://dummy-upload-url"
+    NEW_ASSET_RESP = {
+        "assetId": DUMMY_ASSET_ID,
+        "uploadUrl": DUMMY_UPLOAD_URL,
+        "contentType": "image/jpeg",
+        "description": "test image",
+    }
+    image_300kb = generate_image(approx_kbytes=300)
+    input_dataset = {
+        "rows": [
+            {
+                "text_input": [
+                    {"image_url": image_300kb},
+                ]
+            }
+        ]
+    }
+
+    sut = NvcfUploader(nvcf_api_key=DUMMY_API_KEY, threshold_kbytes=200)
+
+    requests_mock.post(
+        "https://api.nvcf.nvidia.com/v2/nvcf/assets", json=NEW_ASSET_RESP
+    )
+    requests_mock.put(DUMMY_UPLOAD_URL)
+    sut.upload_large_assets(input_dataset)
+
+    report = sut.get_upload_report()
+    assert DUMMY_ASSET_ID in report, "file upload not recorded"