From 57f9a85f18edef140fe1e43f6b13db021bad7ff6 Mon Sep 17 00:00:00 2001
From: Yingbei Tong <yingbei@acorn.io>
Date: Wed, 17 Jul 2024 22:50:17 +0000
Subject: [PATCH] add groq tool use models benchmark (#134)

---
 docs/docs/benchmark.mdx               |  2 ++
 docs/src/components/BenchmarkTable.js | 20 ++++++++++++++++++++
 2 files changed, 22 insertions(+)

diff --git a/docs/docs/benchmark.mdx b/docs/docs/benchmark.mdx
index e366193..6c92fdc 100644
--- a/docs/docs/benchmark.mdx
+++ b/docs/docs/benchmark.mdx
@@ -32,6 +32,8 @@ Some of the LLMs above require using custom libraries to post-process LLM genera
 
 `functionary-small-v2.5` and `functionary-medium-v3.0` models are tested using [MeetKai's functionary](https://github.com/MeetKai/functionary?tab=readme-ov-file#setup) with the vllm framework. For each model, we compared the results with functionary's `Grammar Sampling` feature enabled and disabled, taking the highest score from either configuration. The `functionary-small-v2.5` model achieved a higher score than the `functionary-medium-v3.0` model, primarily due to the medium model exhibiting more hallucinations in some of our more advanced test cases.
 
+`groq/Llama-3-Groq-8B-Tool-Use` and `groq/Llama-3-Groq-70B-Tool-Use` are tested using [groq's API](https://console.groq.com/docs/tool-use).
+
 :::::
 
 ∔ `Nexusflow/NexusRaven-V2-13B` and `gorilla-llm/gorilla-openfunctions-v2` don't accept tool observations, the result of running a tool or function once the LLM calls it, so we appended the observation to the prompt.
\ No newline at end of file
diff --git a/docs/src/components/BenchmarkTable.js b/docs/src/components/BenchmarkTable.js
index 846c633..8d4c479 100644
--- a/docs/src/components/BenchmarkTable.js
+++ b/docs/src/components/BenchmarkTable.js
@@ -229,6 +229,26 @@ const data = [
         gsm8k: '66.11',
         math: '20.54',
         mtBench:'7.09',
+    },
+    {
+        model: 'groq/Llama-3-Groq-8B-Tool-Use',
+        params: 8.03,
+        functionCalling: '45.70%',
+        mmlu: '-',
+        gpqa: '-',
+        gsm8k: '-',
+        math: '-',
+        mtBench:'-',
+    },
+    {
+        model: 'groq/Llama-3-Groq-70B-Tool-Use',
+        params: 70.6,
+        functionCalling: '74.29%',
+        mmlu: '-',
+        gpqa: '-',
+        gsm8k: '-',
+        math: '-',
+        mtBench:'-',
     }
 ];