From 64c88bcb5aa979c557591cf446bd8771e773c1f6 Mon Sep 17 00:00:00 2001
From: F-G Fernandez <26927750+frgfm@users.noreply.github.com>
Date: Wed, 27 Mar 2024 17:17:41 +0100
Subject: [PATCH] docs(benchmark): add throughput evaluation for NVIDIA L4 GPUs
 (#138)

* docs(benchmark): add throughput for NVIDIA L4

* build(docker): update docker orchestration for ollama bench

* docs(readme): add GPU reference to benchmark readme
---
 scripts/ollama/README.md          | 1 +
 scripts/ollama/docker-compose.yml | 2 +-
 scripts/ollama/latency.csv        | 5 +++++
 3 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/scripts/ollama/README.md b/scripts/ollama/README.md
index f01bd9b..ca10d35 100644
--- a/scripts/ollama/README.md
+++ b/scripts/ollama/README.md
@@ -10,6 +10,7 @@ We ran our tests on the following hardware:
 - [NVIDIA GeForce RTX 3070](https://www.nvidia.com/fr-fr/geforce/graphics-cards/30-series/rtx-3070-3070ti/) ([Scaleway GPU-3070-S](https://www.scaleway.com/en/pricing/?tags=compute))
 - [NVIDIA A10](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) ([Lambda Cloud gpu_1x_a10](https://lambdalabs.com/service/gpu-cloud#pricing))
 - [NVIDIA A10G](https://www.nvidia.com/en-us/data-center/products/a10-gpu/) ([AWS g5.xlarge](https://aws.amazon.com/ec2/instance-types/g5/))
+- [NVIDIA L4](https://www.nvidia.com/en-us/data-center/l4/) ([Scaleway L4-1-24G](https://www.scaleway.com/en/pricing/?tags=compute))
 
 *The laptop hardware setup includes an [Intel(R) Core(TM) i7-12700H](https://ark.intel.com/content/www/us/en/ark/products/132228/intel-core-i7-12700h-processor-24m-cache-up-to-4-70-ghz.html) for the CPU*
 
diff --git a/scripts/ollama/docker-compose.yml b/scripts/ollama/docker-compose.yml
index 3d220a9..810e2b5 100644
--- a/scripts/ollama/docker-compose.yml
+++ b/scripts/ollama/docker-compose.yml
@@ -22,7 +22,7 @@ services:
       retries: 3
 
   evaluator:
-    image: quackai/evaluator:latest
+    image: quackai/llm-evaluator:latest
     build: .
     depends_on:
       ollama:
diff --git a/scripts/ollama/latency.csv b/scripts/ollama/latency.csv
index 4ffdf00..a373804 100644
--- a/scripts/ollama/latency.csv
+++ b/scripts/ollama/latency.csv
@@ -20,3 +20,8 @@ deepseek-coder:6.7b-instruct-q3_K_M,A10G (AWS g5.xlarge),99.83,35.41,84.47,1.69
 pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M,A10G (AWS g5.xlarge),212.08,86.58,79.02,3.35
 dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M,A10G (AWS g5.xlarge),187.2,62.24,75.91,1
 dolphin-mistral:7b-v2.6-dpo-laser-q3_K_M,A10G (AWS g5.xlarge),102.36,34.29,81.23,1.02
+deepseek-coder:6.7b-instruct-q4_K_M,NVIDIA L4 (Scaleway L4-1-24G),213.46,76.24,49.97,1.01
+deepseek-coder:6.7b-instruct-q3_K_M,NVIDIA L4 (Scaleway L4-1-24G),118.87,43.35,54.72,1.31
+pxlksr/opencodeinterpreter-ds:6.7b-Q4_K_M,NVIDIA L4 (Scaleway L4-1-24G),225.62,60.21,49.39,1.9
+dolphin-mistral:7b-v2.6-dpo-laser-q4_K_M,NVIDIA L4 (Scaleway L4-1-24G),211.52,72.76,47.27,0.58
+dolphin-mistral:7b-v2.6-dpo-laser-q3_K_M,NVIDIA L4 (Scaleway L4-1-24G),120.13,41.09,51.9,0.71