diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py index 71970e48a..4b89cd24f 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/dataset_retriever.py @@ -45,10 +45,22 @@ def from_url(url: str, starting_index: int, length: int) -> List[Dict[str, Any]] def from_file(file_path: Path) -> List[Dict[str, str]]: with open(file_path, "r") as file: data = [load_json_str(line) for line in file] - if not all(isinstance(item, dict) for item in data): - raise GenAIPerfException("File content is not in the expected format.") - return [{"text_input": item.get("text_input", "")} for item in data] + for item in data: + if not isinstance(item, dict): + raise GenAIPerfException( + "File content is not in the expected format." + ) + if "text_input" not in item: + raise GenAIPerfException( + "Missing 'text_input' field in one or more items." + ) + if len(item) != 1 or "text_input" not in item: + raise GenAIPerfException( + "Each item must only contain the 'text_input' field." + ) + + return [{"text_input": item["text_input"]} for item in data] @staticmethod def from_synthetic( diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py index 16e609d8a..852a43ddf 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/output_format_converter.py @@ -217,28 +217,20 @@ def convert( model_name: list, model_selection_strategy: ModelSelectionStrategy, ) -> Dict: - pa_json: Dict = {"data": []} - + data = [] for index, row in enumerate(generic_dataset["rows"]): - text_content = row["row"]["text_input"] model = self._select_model_name(model_name, index, model_selection_strategy) - payload = {"text_input": text_content, "model": model} - - if add_stream: - payload["stream"] = True - if output_tokens_mean != -1: - max_tokens = int( - max(0, random.gauss(output_tokens_mean, output_tokens_stddev)) - ) - sampling_parameters = {"max_tokens": max_tokens} - if output_tokens_deterministic: - sampling_parameters["min_tokens"] = max_tokens - payload["sampling_parameters"] = json.dumps(sampling_parameters) - payload.update(extra_inputs) - - pa_json["data"].append(payload) - - return pa_json + text_input = row["row"]["text_input"] + + data.append( + { + "text_input": [text_input], + "model": model, + "exclude_input_in_output": [True], + **extra_inputs, + } + ) + return {"data": data} class TensorRTLLMConverter(BaseConverter):