Skip to content

Commit

Permalink
Update llama-inference.yaml
Browse files Browse the repository at this point in the history
  • Loading branch information
vince62s authored Sep 20, 2024
1 parent c4255b0 commit 2a67f28
Showing 1 changed file with 4 additions and 14 deletions.
18 changes: 4 additions & 14 deletions recipes/llama3/llama-inference.yaml
Original file line number Diff line number Diff line change
@@ -1,13 +1,3 @@
transforms: [onmt_tokenize]

transforms_configs:
onmt_tokenize:
src_subword_type: bpe
src_subword_model: "${EOLE_MODEL_DIR}/llama3-8b-instruct/bpe.model"
tgt_subword_type: bpe
tgt_subword_model: "${EOLE_MODEL_DIR}/llama3-8b-instruct/bpe.model"
gpt2_pretok: true

# Model info
model_path: "${EOLE_MODEL_DIR}/llama3-8b-instruct"

Expand All @@ -24,10 +14,10 @@ gpu_ranks: [0]
# parallel_mode: "tensor_parallel"
quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
quant_type: "bnb_NF4"
compute_dtype: fp16
#random_sampling_topk: 1
#random_sampling_topp: 0.0
#random_sampling_temp: 0.9
compute_dtype: bf16
#top_k: 1
#top_p: 0.0
#temperature: 0.9
beam_size: 1
n_best: 1
report_time: true
Expand Down

0 comments on commit 2a67f28

Please sign in to comment.