Skip to content

Commit

Permalink
add gpu memory benchmark
Browse files Browse the repository at this point in the history
  • Loading branch information
rui-ren committed Mar 26, 2024
1 parent 2e7ea85 commit 4b55600
Show file tree
Hide file tree
Showing 2 changed files with 69 additions and 0 deletions.
57 changes: 57 additions & 0 deletions benchmark/python/benchmark_e2e.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,41 @@
import time
import argparse
from tqdm import tqdm
import shutil

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'shutil' is not used.
import os

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'os' is not used.
import subprocess
import threading
import sys

Check notice

Code scanning / CodeQL

Unused import Note test

Import of 'sys' is not used.


peak_memory = 0.0
peak_memory_lock = threading.Lock()
gpu_memory_data = []
stop_monitoring = False


# Monitor the GPU memory usage
def monitor_gpu_memory():
global peak_memory, peak_gpus_memory, gpu_memory_data
peak_gpus_memory = []

Check notice

Code scanning / CodeQL

Unused global variable Note test

The global variable 'peak_gpus_memory' is not used.

while not stop_monitoring:
result = subprocess.run(['nvidia-smi', '--query-gpu=memory.used', '--format=csv'], capture_output=True, text=True)

memory_usage = result.stdout.splitlines()

if len(memory_usage) > 1:
gpu_memory = [float(line.split(' ')[0]) for line in memory_usage[1:]]
gpu_memory_data.append(gpu_memory)
current_peak = max(gpu_memory)
with peak_memory_lock:
if current_peak > peak_memory:
peak_gpus_memory = gpu_memory

Check notice

Code scanning / CodeQL

Unused global variable Note test

The global variable 'peak_gpus_memory' is not used.
peak_memory = current_peak
else:
print("No GPU Memory Info Found")
time.sleep(0.1)


# Use input model to generate prompt
def generate_prompt(model, tokenizer, prompt_length) -> str:
Expand Down Expand Up @@ -50,6 +85,21 @@ def save_results(results, filename):
print(f"Results saved in {filename}!")

def main(args):
global stop_monitoring

try:
subprocess.run(["nvidia-smi"], check=True)
IS_NVIDIA_SYSTEM = True
except Exception:
IS_NVIDIA_SYSTEM = False

if IS_NVIDIA_SYSTEM:
monitor_thread = threading.Thread(target=monitor_gpu_memory)
else:
# cpu monitor thread
pass


# Get user arguments
num_repetitions = args.repetitions
batch_size, prompt_length, generation_length = args.batch_size, args.prompt_length, args.generation_length
Expand All @@ -62,6 +112,8 @@ def main(args):
if args.verbose: print("Model loaded")
tokenizer = og.Tokenizer(model)

monitor_thread.start()

Check failure

Code scanning / CodeQL

Potentially uninitialized local variable Error test

Local variable 'monitor_thread' may be used before it is initialized.

# Generate prompt
prompt = [generate_prompt(model, tokenizer, prompt_length)] * batch_size
tokens = tokenizer.encode_batch(prompt)
Expand Down Expand Up @@ -130,6 +182,11 @@ def main(args):
wall_clock_times.append(wall_clock_end_time - wall_clock_start_time)
if args.print_model_output: print(tokenizer.decode(generator.get_sequence(0)))

stop_monitoring = True
monitor_thread.join()

print(f"************** Peak GPU Memory Usage: {peak_memory / 1024} GiB ********************")

# Calculate tokenization metrics
avg_tokenization_latency_s = sum(tokenize_times) / len(tokenize_times)
avg_tokenization_latency_ms = avg_tokenization_latency_s * 1000
Expand Down
12 changes: 12 additions & 0 deletions benchmark/python/output
Original file line number Diff line number Diff line change
@@ -0,0 +1,12 @@
Batch Size,1.0
Prompt Length,128.0
Tokenization Throughput (tps),2683.59902623915
Tokenization Latency (ms),0.37263391073793173
Prompt Processing Throughput (tps),14600.297229003849
Prompt Processing Latency (ms),0.06849175631941762
Token Generation Throughput (tps),257.5347160030977
Token Generation Latency (ms),3.882971645609021
Sampling Throughput (tps),47263.26307837957
Sampling Latency (ms),0.021158082088865483
Wall Clock Throughput (tps),364.75685284405847
Wall Clock Time (s),1.052756094932556

0 comments on commit 4b55600

Please sign in to comment.