Skip to content

Commit

Permalink
Add Llama3 configuration
Browse files Browse the repository at this point in the history
  • Loading branch information
pgmpablo157321 committed Nov 22, 2024
1 parent beaa790 commit e44c62a
Showing 1 changed file with 10 additions and 2 deletions.
12 changes: 10 additions & 2 deletions loadgen/mlperf.conf
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dlrm-v2.*.performance_sample_count_override = 204800
rnnt.*.performance_sample_count_override = 2513
gptj.*.performance_sample_count_override = 13368
llama2-70b.*.performance_sample_count_override = 24576
llama3-405b.*.performance_sample_count_override = 8312
stable-diffusion-xl.*.performance_sample_count_override = 5000
# set to 0 to let entire sample set to be performance sample
3d-unet.*.performance_sample_count_override = 0
Expand Down Expand Up @@ -44,6 +45,7 @@ retinanet.MultiStream.target_latency = 528
gptj.*.sample_concatenate_permutation = 1
llama2-70b.*.sample_concatenate_permutation = 1
mixtral-8x7b.*.sample_concatenate_permutation = 1
llama3-405b.*.sample_concatenate_permutation = 1

*.Server.target_latency = 10
*.Server.target_latency_percentile = 99
Expand All @@ -57,13 +59,14 @@ dlrm-v2.Server.target_latency = 60
rnnt.Server.target_latency = 1000
gptj.Server.target_latency = 20000
stable-diffusion-xl.Server.target_latency = 20000
# Llama2-70b benchmarks measures token latencies
# Benchmarks that measure token latencies
llama2-70b.*.use_token_latencies = 1
mixtral-8x7b.*.use_token_latencies = 1
llama3-405b.*.use_token_latencies = 1
# gptj benchmark infers token latencies
gptj.*.infer_token_latencies = 1
gptj.*.token_latency_scaling_factor = 69
# Only ttft and tpot are tracked for the llama2-70b & mixtral-8x7B benchmark therefore target_latency = 0
# Only ttft and tpot are tracked for the llama2-70b, mixtral-8x7B & llama3-405b benchmark therefore target_latency = 0
llama2-70b.Server.target_latency = 0
llama2-70b.Server.ttft_latency = 2000
llama2-70b.Server.tpot_latency = 200
Expand All @@ -72,6 +75,10 @@ mixtral-8x7b.Server.target_latency = 0
mixtral-8x7b.Server.ttft_latency = 2000
mixtral-8x7b.Server.tpot_latency = 200

llama3-405b.Server.target_latency = 0
llama3-405b.Server.ttft_latency = 6000
llama3-405b.Server.tpot_latency = 175

*.Offline.target_latency_percentile = 90
*.Offline.min_duration = 600000

Expand All @@ -89,6 +96,7 @@ rnnt.Offline.min_query_count = 2513
3d-unet.Offline.min_query_count = 43
stable-diffusion-xl.Offline.min_query_count = 5000
llama2-70b.Offline.min_query_count = 24576
llama3-405b.Offline.min_query_count = 8312
mixtral-8x7b.Offline.min_query_count = 15000

# These fields should be defined and overridden by user.conf.
Expand Down

0 comments on commit e44c62a

Please sign in to comment.