tatsu-lab · YannDubs · Nov 11, 2024 · Sep 9, 2024 · Oct 13, 2024
diff --git a/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md
@@ -0,0 +1,28 @@
+# Annotator Served by vLLM
+
+This config demonstrates how to utilize an annotator served by vLLM. This brings some advantages: 
+- Allow users to use "weighted"-style annotator, similar to `weighted_alpaca_eval_gpt4_turbo`;
+- One vLLM server can support multiple nodes in a cluster environment;
+- Easy setup using vLLM's OpenAI-compatible APIs. 
+
+## Setup
+1. Start the vLLM Server:
+
+    ```bash
+    vllm serve /home/shared/Meta-Llama-3-70B-Instruct --dtype auto --api-key token-abc123
+    ```
+
+2. Create the client config `local_configs.yaml` in `client_configs` folder:
+
+    ```bash 
+    default:
+        - api_key: "token-abc123"
+        base_url: "http://localhost:8000/v1"
+    ```
+
+3. Run evaluation: 
+
+    ```bash
+    export OPENAI_CLIENT_CONFIG_PATH=<path to local_configs.yaml>
+    alpaca_eval evaluate --model_outputs 'example/outputs.json' --annotators_config weighted_alpaca_eval_vllm_llama3_70b
+    ```
diff --git a/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml
@@ -0,0 +1,17 @@
+weighted_alpaca_eval_vllm_llama3_70b:
+  prompt_template: "alpaca_eval_clf_gpt4_turbo/alpaca_eval_clf.txt"
+  fn_completions: "openai_completions"
+  completions_kwargs:
+    model_name: "/home/shared/Meta-Llama-3-70B-Instruct" # TODO: replace with path to the model
+    max_tokens: 1
+    temperature: 1 # temperature should be applied for sampling, so that should make no effect.
+    logprobs: true
+    top_logprobs: 5
+    requires_chatml: true
+  fn_completion_parser: "logprob_parser"
+  completion_parser_kwargs:
+    numerator_token: "m"
+    denominator_tokens: ["m", "M"]
+    is_binarize: false
+  completion_key: "completions_all"
+  batch_size: 1