From 3c107a1d16609b381c32a8317f9a34205af627ce Mon Sep 17 00:00:00 2001 From: cameron-chen Date: Mon, 9 Sep 2024 09:26:38 +0800 Subject: [PATCH 1/2] add example for Llama3 vllm server --- .../configs.yaml | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) create mode 100644 src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml diff --git a/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml new file mode 100644 index 00000000..11e20754 --- /dev/null +++ b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/configs.yaml @@ -0,0 +1,17 @@ +weighted_alpaca_eval_vllm_llama3_70b: + prompt_template: "alpaca_eval_clf_gpt4_turbo/alpaca_eval_clf.txt" + fn_completions: "openai_completions" + completions_kwargs: + model_name: "/home/shared/Meta-Llama-3-70B-Instruct" # TODO: replace with path to the model + max_tokens: 1 + temperature: 1 # temperature should be applied for sampling, so that should make no effect. + logprobs: true + top_logprobs: 5 + requires_chatml: true + fn_completion_parser: "logprob_parser" + completion_parser_kwargs: + numerator_token: "m" + denominator_tokens: ["m", "M"] + is_binarize: false + completion_key: "completions_all" + batch_size: 1 From 5a3d4a6d6499e5990b2944c54194bc78dbfe4db5 Mon Sep 17 00:00:00 2001 From: cameron-chen Date: Sun, 13 Oct 2024 14:24:28 +0000 Subject: [PATCH 2/2] add readme --- .../README.md | 28 +++++++++++++++++++ 1 file changed, 28 insertions(+) create mode 100644 src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md diff --git a/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md new file mode 100644 index 00000000..365627f8 --- /dev/null +++ b/src/alpaca_eval/evaluators_configs/weighted_alpaca_eval_vllm_llama3_70b/README.md @@ -0,0 +1,28 @@ +# Annotator Served by vLLM + +This config demonstrates how to utilize an annotator served by vLLM. This brings some advantages: +- Allow users to use "weighted"-style annotator, similar to `weighted_alpaca_eval_gpt4_turbo`; +- One vLLM server can support multiple nodes in a cluster environment; +- Easy setup using vLLM's OpenAI-compatible APIs. + +## Setup +1. Start the vLLM Server: + + ```bash + vllm serve /home/shared/Meta-Llama-3-70B-Instruct --dtype auto --api-key token-abc123 + ``` + +2. Create the client config `local_configs.yaml` in `client_configs` folder: + + ```bash + default: + - api_key: "token-abc123" + base_url: "http://localhost:8000/v1" + ``` + +3. Run evaluation: + + ```bash + export OPENAI_CLIENT_CONFIG_PATH= + alpaca_eval evaluate --model_outputs 'example/outputs.json' --annotators_config weighted_alpaca_eval_vllm_llama3_70b + ``` \ No newline at end of file