diff --git a/src/c++/perf_analyzer/genai-perf/README.md b/src/c++/perf_analyzer/genai-perf/README.md index d9f288996..9c553115d 100644 --- a/src/c++/perf_analyzer/genai-perf/README.md +++ b/src/c++/perf_analyzer/genai-perf/README.md @@ -373,7 +373,7 @@ model config to not echo the input tokens in the output. (default: tensorrtllm) Set a custom endpoint that differs from the OpenAI defaults. (default: `None`) -##### `--endpoint-type {chat,completions,embeddings}` +##### `--endpoint-type {chat,completions,embeddings,rankings}` The endpoint-type to send requests to on the server. This is only used with the `openai` service-kind. (default: `None`) @@ -400,7 +400,8 @@ URL of the endpoint to target for benchmarking. (default: `None`) The batch size of the requests GenAI-Perf should send. This is currently only supported with the [embeddings endpoint type](docs/embeddings.md). -(default: `1`) +(default: `1`) and +[rankings endpoint type](docs/rankings.md). ##### `--extra-inputs ` diff --git a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py index a64bbb089..de528aac4 100644 --- a/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py +++ b/src/c++/perf_analyzer/genai-perf/genai_perf/llm_inputs/llm_inputs.py @@ -139,7 +139,7 @@ def create_llm_inputs( output_tokens_deterministic: If true, the output tokens will set the minimum and maximum tokens to be equivalent. batch_size: - The number of inputs per request (currently only used for v1/embeddings) + The number of inputs per request (currently only used for the embeddings and rankings endpoints) Required Synthetic Prompt Generation Parameters ----------------------------------------------- @@ -236,7 +236,7 @@ def get_generic_dataset_json( num_of_output_prompts: The number of synthetic output prompts to generate batch_size: - The number of inputs per request (currently only used for v1/embeddings) + The number of inputs per request (currently only used for the embeddings and rankings endpoints) input_filename: The path to the input file containing the prompts in JSONL format. Returns