From ec8af40c6ae62a0d511d15db92d9f34e70c334b6 Mon Sep 17 00:00:00 2001 From: ai_user Date: Wed, 18 Sep 2024 09:35:16 +0000 Subject: [PATCH 1/4] Fix the mean_latency calculation error in the OSS model --- .../bfcl/eval_checker/eval_runner_helper.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py index 3f812d47c..178a5da46 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py @@ -1022,7 +1022,7 @@ def record_cost_latency(leaderboard_table, model_name, model_output_data): leaderboard_table[model_name]["latency"]["data"].extend(latency) -def get_cost_letency_info(model_name, cost_data, latency_data): +def get_cost_letency_info(model_name, cost_data, latency_data, total_count): cost, mean_latency, std_latency, percentile_95_latency = "N/A", "N/A", "N/A", "N/A" @@ -1042,7 +1042,7 @@ def get_cost_letency_info(model_name, cost_data, latency_data): if model_name in OSS_LATENCY: mean_latency, std_latency, percentile_95_latency = ( - OSS_LATENCY[model_name] / 1700, + OSS_LATENCY[model_name] / total_count, "N/A", "N/A", ) @@ -1077,11 +1077,13 @@ def generate_leaderboard_csv( data_combined = [] for model_name, value in leaderboard_table.items(): model_name_escaped = model_name.replace("_", "/") - + total_count = 0 + for _, v in value.items(): + total_count += v.get("total_count", 0) cost_data = value.get("cost", {"input_data": [], "output_data": []}) latency_data = value.get("latency", {"data": []}) cost, latency_mean, latency_std, percentile_95_latency = get_cost_letency_info( - model_name_escaped, cost_data, latency_data + model_name_escaped, cost_data, latency_data, total_count ) # Non-Live Score From c85d6b20bf71702890d167db3733afb163f6f683 Mon Sep 17 00:00:00 2001 From: ai_user Date: Wed, 18 Sep 2024 10:13:02 +0000 Subject: [PATCH 2/4] Add Qwen handler --- .../bfcl/eval_checker/eval_runner_helper.py | 15 +++++++++++++- .../bfcl/model_handler/handler_map.py | 6 ++++-- .../bfcl/model_handler/oss_model/qwen.py | 20 +++++++++++++++++++ 3 files changed, 38 insertions(+), 3 deletions(-) create mode 100644 berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py index 178a5da46..1ffc158de 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py @@ -568,7 +568,19 @@ "https://huggingface.co/MadeAgents/Hammer-7b", "MadeAgents", "cc-by-nc-4.0", - ] + ], + "Qwen/Qwen2-1.5B-Instruct": [ + "Qwen2-1.5B-Instruct", + "https://huggingface.co/Qwen/Qwen2-1.5B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2-7B-Instruct": [ + "Qwen2-7B-Instruct", + "https://huggingface.co/Qwen/Qwen2-7B-Instruct", + "Qwen", + "apache-2.0", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { @@ -698,6 +710,7 @@ "meta-llama/Meta-Llama-3-70B-Instruct": 307, "gorilla-openfunctions-v2": 83, "THUDM/glm-4-9b-chat": 223, + "Qwen/Qwen2-1.5B-Instruct": 40, } diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index d583b3723..0820df5fe 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -20,7 +20,7 @@ from bfcl.model_handler.proprietary_model.yi import YiHandler from bfcl.model_handler.oss_model.salesforce import SalesforceHandler from bfcl.model_handler.oss_model.hammer import HammerHandler - +from bfcl.model_handler.oss_model.qwen import QwenHandler handler_map = { "gorilla-openfunctions-v0": GorillaHandler, "gorilla-openfunctions-v2": GorillaHandler, @@ -102,5 +102,7 @@ "Salesforce/xLAM-7b-r": SalesforceHandler, "Salesforce/xLAM-8x7b-r": SalesforceHandler, "Salesforce/xLAM-8x22b-r": SalesforceHandler, - "MadeAgents/Hammer-7b": HammerHandler + "MadeAgents/Hammer-7b": HammerHandler, + "Qwen/Qwen2-1.5B-Instruct": QwenHandler, + "Qwen/Qwen2-7B-Instruct": QwenHandler, } diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py new file mode 100644 index 000000000..bcfbf6a13 --- /dev/null +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py @@ -0,0 +1,20 @@ +from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler + +class QwenHandler(OSSHandler): + def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None: + super().__init__(model_name, temperature, top_p, max_tokens) + + def apply_chat_template(self, prompts, function, test_category): + formatted_prompt = "" + for prompt in prompts: + formatted_prompt += f"<|im_start|>{prompt['role']}\n{prompt['content']}<|im_end|>\n" + formatted_prompt += "<|im_start|>assistant\n" + return formatted_prompt + + def inference(self, test_question, num_gpus, gpu_memory_utilization): + return super().inference( + test_question, + num_gpus, + gpu_memory_utilization, + format_prompt_func=self.apply_chat_template, + ) \ No newline at end of file From 2f6dd7109760d019e0b504cdb91c49cffb3fd1e7 Mon Sep 17 00:00:00 2001 From: ai_user Date: Wed, 18 Sep 2024 11:13:05 +0000 Subject: [PATCH 3/4] update Qwen handler --- .../bfcl/eval_checker/eval_runner_helper.py | 3 ++- .../bfcl/model_handler/handler_map.py | 1 + .../bfcl/model_handler/oss_model/qwen.py | 2 +- 3 files changed, 4 insertions(+), 2 deletions(-) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py index 1ffc158de..a32decb00 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py @@ -710,7 +710,8 @@ "meta-llama/Meta-Llama-3-70B-Instruct": 307, "gorilla-openfunctions-v2": 83, "THUDM/glm-4-9b-chat": 223, - "Qwen/Qwen2-1.5B-Instruct": 40, + "Qwen/Qwen2-1.5B-Instruct": 100, + "Qwen/Qwen2-7B-Instruct": 100, } diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 0820df5fe..27d357146 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -21,6 +21,7 @@ from bfcl.model_handler.oss_model.salesforce import SalesforceHandler from bfcl.model_handler.oss_model.hammer import HammerHandler from bfcl.model_handler.oss_model.qwen import QwenHandler + handler_map = { "gorilla-openfunctions-v0": GorillaHandler, "gorilla-openfunctions-v2": GorillaHandler, diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py index bcfbf6a13..e251ffa29 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/oss_model/qwen.py @@ -1,7 +1,7 @@ from bfcl.model_handler.oss_model.base_oss_handler import OSSHandler class QwenHandler(OSSHandler): - def __init__(self, model_name, temperature=0.7, top_p=1, max_tokens=1000) -> None: + def __init__(self, model_name, temperature=0.001, top_p=1, max_tokens=1000) -> None: super().__init__(model_name, temperature, top_p, max_tokens) def apply_chat_template(self, prompts, function, test_category): From 3736a89276cbd21cc44eef219a86f77b2e1008f2 Mon Sep 17 00:00:00 2001 From: ai_user Date: Thu, 19 Sep 2024 03:01:37 +0000 Subject: [PATCH 4/4] update qwen2.5 --- .../bfcl/eval_checker/eval_runner_helper.py | 14 ++++++++++++++ .../bfcl/model_handler/handler_map.py | 2 ++ 2 files changed, 16 insertions(+) diff --git a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py index a32decb00..b28a50e89 100644 --- a/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py +++ b/berkeley-function-call-leaderboard/bfcl/eval_checker/eval_runner_helper.py @@ -581,6 +581,18 @@ "Qwen", "apache-2.0", ], + "Qwen/Qwen2.5-1.5B-Instruct": [ + "Qwen2.5-1.5B-Instruct", + "https://huggingface.co/Qwen/Qwen2.5-1.5B-Instruct", + "Qwen", + "apache-2.0", + ], + "Qwen/Qwen2.5-7B-Instruct": [ + "Qwen2.5-7B-Instruct", + "https://huggingface.co/Qwen/Qwen2.5-7B-Instruct", + "Qwen", + "apache-2.0", + ], } INPUT_PRICE_PER_MILLION_TOKEN = { @@ -712,6 +724,8 @@ "THUDM/glm-4-9b-chat": 223, "Qwen/Qwen2-1.5B-Instruct": 100, "Qwen/Qwen2-7B-Instruct": 100, + "Qwen/Qwen2.5-1.5B-Instruct": 100, + "Qwen/Qwen2.5-7B-Instruct": 100, } diff --git a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py index 27d357146..9cd7a8a82 100644 --- a/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py +++ b/berkeley-function-call-leaderboard/bfcl/model_handler/handler_map.py @@ -106,4 +106,6 @@ "MadeAgents/Hammer-7b": HammerHandler, "Qwen/Qwen2-1.5B-Instruct": QwenHandler, "Qwen/Qwen2-7B-Instruct": QwenHandler, + "Qwen/Qwen2.5-1.5B-Instruct": QwenHandler, + "Qwen/Qwen2.5-7B-Instruct": QwenHandler, }