ShishirPatil · zhangch-ss · Sep 18, 2024 · Sep 18, 2024 · Sep 18, 2024 · Sep 19, 2024
diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py
@@ -568,7 +568,31 @@
  "https://huggingface.co/MadeAgents/Hammer-7b",
  "MadeAgents",
  "cc-by-nc-4.0",
- ]
+ ],
+ "Qwen/Qwen2-1.5B-Instruct": [
+ "Qwen2-1.5B-Instruct",
+ "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct",
+ "Qwen",
+ "apache-2.0",
+ ],
+ "Qwen/Qwen2-7B-Instruct": [
+ "Qwen2-7B-Instruct",
+ "https://huggingface.co/Qwen/Qwen2-7B-Instruct",
+ "Qwen",
+ "apache-2.0",
+ ],
+ "Qwen/Qwen2.5-1.5B-Instruct": [
+ "Qwen2.5-1.5B-Instruct",
+ "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct",
+ "Qwen",
+ "apache-2.0",
+ ],
+ "Qwen/Qwen2.5-7B-Instruct": [
+ "Qwen2.5-7B-Instruct",
+ "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct",
+ "Qwen",
+ "apache-2.0",
+ ],
 }
 
 INPUT_PRICE_PER_MILLION_TOKEN = {
@@ -698,6 +722,10 @@
  "meta-llama/Meta-Llama-3-70B-Instruct": 307,
  "gorilla-openfunctions-v2": 83,
  "THUDM/glm-4-9b-chat": 223,
+ "Qwen/Qwen2-1.5B-Instruct": 100,
+ "Qwen/Qwen2-7B-Instruct": 100,
+ "Qwen/Qwen2.5-1.5B-Instruct": 100,
+ "Qwen/Qwen2.5-7B-Instruct": 100,
 }
 
 
@@ -1022,7 +1050,7 @@ def record_cost_latency(leaderboard_table, model_name, model_output_data):
  leaderboard_table[model_name]["latency"]["data"].extend(latency)
 
 
-def get_cost_letency_info(model_name, cost_data, latency_data):
+def get_cost_letency_info(model_name, cost_data, latency_data, total_count):
 
  cost, mean_latency, std_latency, percentile_95_latency = "N/A", "N/A", "N/A", "N/A"
 
@@ -1042,7 +1070,7 @@ def get_cost_letency_info(model_name, cost_data, latency_data):
 
  if model_name in OSS_LATENCY:
  mean_latency, std_latency, percentile_95_latency = (
- OSS_LATENCY[model_name] / 1700,
+ OSS_LATENCY[model_name] / total_count,
  "N/A",
  "N/A",
  )
@@ -1077,11 +1105,13 @@ def generate_leaderboard_csv(
  data_combined = []
  for model_name, value in leaderboard_table.items():
  model_name_escaped = model_name.replace("_", "/")
-
+ total_count = 0
+ for _, v in value.items():
+ total_count += v.get("total_count", 0)
  cost_data = value.get("cost", {"input_data": [], "output_data": []})
  latency_data = value.get("latency", {"data": []})
  cost, latency_mean, latency_std, percentile_95_latency = get_cost_letency_info(
- model_name_escaped, cost_data, latency_data
+ model_name_escaped, cost_data, latency_data, total_count
  )
 
  # Non-Live Score

diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py
@@ -20,6 +20,7 @@
 from bfcl.model_handler.proprietary_model.yi import YiHandler
 from bfcl.model_handler.oss_model.salesforce import SalesforceHandler
 from bfcl.model_handler.oss_model.hammer import HammerHandler
+from bfcl.model_handler.oss_model.qwen import QwenHandler
 
 handler_map = {
  "gorilla-openfunctions-v0": GorillaHandler,
@@ -102,5 +103,9 @@
  "Salesforce/xLAM-7b-r": SalesforceHandler,
  "Salesforce/xLAM-8x7b-r": SalesforceHandler,
  "Salesforce/xLAM-8x22b-r": SalesforceHandler,
- "MadeAgents/Hammer-7b": HammerHandler
+ "MadeAgents/Hammer-7b": HammerHandler,
+ "Qwen/Qwen2-1.5B-Instruct": QwenHandler,
+ "Qwen/Qwen2-7B-Instruct": QwenHandler,
+ "Qwen/Qwen2.5-1.5B-Instruct": QwenHandler,
+ "Qwen/Qwen2.5-7B-Instruct": QwenHandler,
 }
diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py
@@ -0,0 +1,20 @@
+from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler
+
+class QwenHandler(OSSHandler):
+ def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None:
+ super().__init__(model_name, temperature, top_p, max_tokens)
+
+ def apply_chat_template(self, prompts, function, test_category):
+ formatted_prompt = ""
+ for prompt in prompts:
+ formatted_prompt += f"<|im_start|>{prompt['role']}\n{prompt['content']}<|im_end|>\n"
+ formatted_prompt += "<|im_start|>assistant\n"
+ return formatted_prompt
+
+ def inference(self, test_question, num_gpus, gpu_memory_utilization):
+ return super().inference(
+ test_question,
+ num_gpus,
+ gpu_memory_utilization,
+ format_prompt_func=self.apply_chat_template,
+ )