build(docker): bump ollama to 0.1.32 (#156)

* build(deps): bump ollama to 0.1.32 * docs(benchmark): update latency benchmark * docs(env): update env.example * build(docker): bump ollama in latency eval orchestration
quack-ai · Apr 29, 2024 · 4d359d8 · 4d359d8
1 parent a707f06
commit 4d359d8
Show file tree

Hide file tree

Showing 7 changed files with 14 additions and 5 deletions.
diff --git a/.env.example b/.env.example
@@ -17,7 +17,7 @@ GF_ADMIN_USER='quackadmin'
 GF_ADMIN_PWD='LetsGetQuacking!'
 
 # Optional variables
-OLLAMA_MODEL='dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M'
+OLLAMA_MODEL='codeqwen:7b-chat-v1.5-q4_1'
 # Smaller option
 # OLLAMA_MODEL='tinydolphin:1.1b-v2.8-q4_K_M'
 OLLAMA_TIMEOUT=120

diff --git a/docker-compose.dev.yml b/docker-compose.dev.yml
@@ -17,7 +17,7 @@ services:
       retries: 3
 
   ollama:
-    image: ollama/ollama:0.1.29
+    image: ollama/ollama:0.1.32
     expose:
       - 11434
     volumes:

diff --git a/docker-compose.prod.yml b/docker-compose.prod.yml
@@ -39,7 +39,7 @@ services:
     restart: always
 
   ollama:
-    image: ollama/ollama:0.1.29
+    image: ollama/ollama:0.1.32
     expose:
       - 11434
     volumes:

diff --git a/docker-compose.yml b/docker-compose.yml
@@ -19,7 +19,7 @@ services:
       retries: 3
 
   ollama:
-    image: ollama/ollama:0.1.29
+    image: ollama/ollama:0.1.32
     expose:
       - 11434
     volumes:

diff --git a/docs/developers/gpu-selection.mdx b/docs/developers/gpu-selection.mdx
@@ -21,6 +21,9 @@ The results are available for the following LLMs (cf. [Ollama hub](https://ollam
 - Deepseek Coder 6.7b - instruct ([Ollama](https://ollama.com/library/deepseek-coder), [HuggingFace](https://huggingface.co/deepseek-ai/deepseek-coder-6.7b-instruct))
 - OpenCodeInterpreter 6.7b ([Ollama](https://ollama.com/pxlksr/opencodeinterpreter-ds), [HuggingFace](https://huggingface.co/m-a-p/OpenCodeInterpreter-DS-6.7B), [paper](https://arxiv.org/abs/2402.14658))
 - Dolphin Mistral 7b ([Ollama](https://ollama.com/library/dolphin-mistral), [HuggingFace](https://huggingface.co/cognitivecomputations/dolphin-2.6-mistral-7b-dpo-laser), [paper](https://arxiv.org/abs/2310.06825))
+- CodeQwen 1.5 7b ([Ollama](https://ollama.com/library/codeqwen), [HuggingFace](https://huggingface.co/Qwen/CodeQwen1.5-7B-Chat-GGUF), [blog](https://qwenlm.github.io/blog/codeqwen1.5/))
+- LLaMA 3 7b ([Ollama](https://ollama.com/library/dolphin-llama3), [HuggingFace](https://huggingface.co/cognitivecomputations/dolphin-2.9-llama3-8b-gguf), [blog](https://ai.meta.com/blog/meta-llama-3/))
+- Phi 3 3.8b ([Ollama](https://ollama.com/library/phi3), [HuggingFace](https://huggingface.co/microsoft/Phi-3-mini-4k-instruct-gguf), [paper](https://arxiv.org/abs/2404.14219))
 - Coming soon: StarChat v2 ([HuggingFace](https://huggingface.co/HuggingFaceH4/starchat2-15b-v0.1), [paper](https://arxiv.org/abs/2402.19173))
 
 and the following quantization formats: `q3_K_M`, `q4_K_M`, `q5_K_M`.
@@ -37,6 +40,9 @@ and the following quantization formats: `q3_K_M`, `q4_K_M`, `q5_K_M`.
 | [pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M](https://ollama.com/library/pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M) | 78.94 tok/s (±10.2)   | 37.95 toks/s (±1.65) |
 | [dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M](https://ollama.com/library/dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M) | 126.75 tok/s (±31.5)   | 50.05 toks/s (±0.84) |
 | [dolphin-mistral:7b-v2.6-dpo-laser-q3_K_M](https://ollama.com/library/dolphin-mistral:7b-v2.6-dpo-laser-q3_K_M) | 89.47 tok/s (±29.91)   | 47.09 toks/s (±0.67) |
+| [codeqwen:7b-chat-v1.5-q4_1](https://ollama.com/library/codeqwen:7b-chat-v1.5-q4_1) | 171.72 tok/s (±53.37)   | 54.74 toks/s (±0.82) |
+| [dolphin-llama3:8b-v2.9-q4_K_M](https://ollama.com/library/dolphin-llama3:8b-v2.9-q4_K_M) | 131.89 tok/s (±33.37)   | 50.81 toks/s (±0.66) |
+| [phi3:3.8b-mini-instruct-4k-q4_K_M](https://ollama.com/library/phi3:3.8b-mini-instruct-4k-q4_K_M) | 271.40 tok/s (±52.48)   | 88.43 toks/s (±13.22) |
 
 ### NVIDIA GeForce RTX 3070 (Scaleway GPU-3070-S)
 

diff --git a/scripts/ollama/benchmark_result.csv b/scripts/ollama/benchmark_result.csv
@@ -5,6 +5,9 @@ deepseek-coder:6.7b-instruct-q3_K_M,NVIDIA RTX 3060 (laptop),90.1,32.43,50.34,1.
 pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M,NVIDIA RTX 3060 (laptop),78.94,10.2,37.95,1.65
 dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M,NVIDIA RTX 3060 (laptop),126.75,31.5,50.05,0.84
 dolphin-mistral:7b-v2.6-dpo-laser-q3_K_M,NVIDIA RTX 3060 (laptop),89.47,29.91,47.09,0.67
+codeqwen:7b-chat-v1.5-q4_1,NVIDIA RTX 3060 (laptop),171.72,53.37,54.74,0.82
+dolphin-llama3:8b-v2.9-q4_K_M,NVIDIA RTX 3060 (laptop),131.89,33.37,50.81,0.66
+phi3:3.8b-mini-instruct-4k-q4_K_M,NVIDIA RTX 3060 (laptop),271.4,52.48,88.43,13.22
 deepseek-coder:6.7b-instruct-q4_K_M,NVIDIA RTX 3070 (Scaleway GPU-3070-S),266.98,95.63,75.53,1.56
 deepseek-coder:6.7b-instruct-q3_K_M,NVIDIA RTX 3070 (Scaleway GPU-3070-S),141.43,50.4,73.69,1.61
 pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M,NVIDIA RTX 3070 (Scaleway GPU-3070-S),285.81,73.55,75.14,3.13

diff --git a/scripts/ollama/docker-compose.yml b/scripts/ollama/docker-compose.yml
@@ -2,7 +2,7 @@ version: '3.7'
 
 services:
   ollama:
-    image: ollama/ollama:0.1.29
+    image: ollama/ollama:0.1.32
     ports:
       - "11434:11434"
     volumes: