Fix vLLM backend, add JSON input file check

triton-inference-server · Jul 15, 2024 · 180e30f · 180e30f
1 parent 8872014
commit 180e30f
Show file tree

Hide file tree

Showing 2 changed files with 27 additions and 23 deletions.
diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py
@@ -45,10 +45,22 @@ def from_url(url: str, starting_index: int, length: int) -> List[Dict[str, Any]]
     def from_file(file_path: Path) -> List[Dict[str, str]]:
         with open(file_path, "r") as file:
             data = [load_json_str(line) for line in file]
-        if not all(isinstance(item, dict) for item in data):
-            raise GenAIPerfException("File content is not in the expected format.")
 
-        return [{"text_input": item.get("text_input", "")} for item in data]
+            for item in data:
+                if not isinstance(item, dict):
+                    raise GenAIPerfException(
+                        "File content is not in the expected format."
+                    )
+                if "text_input" not in item:
+                    raise GenAIPerfException(
+                        "Missing 'text_input' field in one or more items."
+                    )
+                if len(item) != 1 or "text_input" not in item:
+                    raise GenAIPerfException(
+                        "Each item must only contain the 'text_input' field."
+                    )
+
+            return [{"text_input": item["text_input"]} for item in data]
 
     @staticmethod
     def from_synthetic(

diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py
@@ -217,28 +217,20 @@ def convert(
         model_name: list,
         model_selection_strategy: ModelSelectionStrategy,
     ) -> Dict:
-        pa_json: Dict = {"data": []}
-
+        data = []
         for index, row in enumerate(generic_dataset["rows"]):
-            text_content = row["row"]["text_input"]
             model = self._select_model_name(model_name, index, model_selection_strategy)
-            payload = {"text_input": text_content, "model": model}
-
-            if add_stream:
-                payload["stream"] = True
-            if output_tokens_mean != -1:
-                max_tokens = int(
-                    max(0, random.gauss(output_tokens_mean, output_tokens_stddev))
-                )
-                sampling_parameters = {"max_tokens": max_tokens}
-                if output_tokens_deterministic:
-                    sampling_parameters["min_tokens"] = max_tokens
-                payload["sampling_parameters"] = json.dumps(sampling_parameters)
-            payload.update(extra_inputs)
-
-            pa_json["data"].append(payload)
-
-        return pa_json
+            text_input = row["row"]["text_input"]
+
+            data.append(
+                {
+                    "text_input": [text_input],
+                    "model": model,
+                    "exclude_input_in_output": [True],
+                    **extra_inputs,
+                }
+            )
+        return {"data": data}
 
 
 class TensorRTLLMConverter(BaseConverter):