Update llama-inference.yaml

eole-nlp · Sep 20, 2024 · 2a67f28 · 2a67f28
1 parent c4255b0
commit 2a67f28
Showing 1 changed file with 4 additions and 14 deletions.
diff --git a/recipes/llama3/llama-inference.yaml b/recipes/llama3/llama-inference.yaml
@@ -1,13 +1,3 @@
-transforms: [onmt_tokenize]
-
-transforms_configs:
- onmt_tokenize:
- src_subword_type: bpe
- src_subword_model: "${EOLE_MODEL_DIR}/llama3-8b-instruct/bpe.model"
- tgt_subword_type: bpe
- tgt_subword_model: "${EOLE_MODEL_DIR}/llama3-8b-instruct/bpe.model"
- gpt2_pretok: true
-
 # Model info
 model_path: "${EOLE_MODEL_DIR}/llama3-8b-instruct"
 
@@ -24,10 +14,10 @@ gpu_ranks: [0]
 # parallel_mode: "tensor_parallel"
 quant_layers: ['gate_up_proj', 'down_proj', 'up_proj', 'linear_values', 'linear_query', 'linear_keys', 'final_linear']
 quant_type: "bnb_NF4"
-compute_dtype: fp16
-#random_sampling_topk: 1
-#random_sampling_topp: 0.0
-#random_sampling_temp: 0.9
+compute_dtype: bf16
+#top_k: 1
+#top_p: 0.0
+#temperature: 0.9
 beam_size: 1
 n_best: 1
 report_time: true