-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathgenerate.sh
44 lines (41 loc) · 2.46 KB
/
generate.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
max_input_len=128
max_cache_len=256
python decoder-only.py -m microsoft/Phi-3-mini-4k-instruct -l ${max_input_len} -c ${max_cache_len}
echo 'LOG: Export prefill model finished!!!'
python decoder-only.py -m microsoft/Phi-3-mini-4k-instruct -l ${max_input_len} -c ${max_cache_len} --decode
echo 'LOG: Export decode model finished!!!'
model_path="logs/models/Phi_3_mini_4k_instruct/"
temp_path="logs/models/done/"
model_list=("Phi_3_mini_4k_instruct_decoder_1_prefill" "Phi_3_mini_4k_instruct_decoder_2_decode")
python convert.py -path ${model_path}${model_list[0]}.onnx
echo 'LOG 1: Generating model with external data finished.'
python opset_convert.py -path ${model_path}${model_list[0]}_ex.onnx -v 21
echo 'LOG 1: Generating model with new opset finished.'
rm ${model_path}${model_list[0]}_ex.onnx*
python quantize_int4.py -path ${model_path}${model_list[0]}_ex_v21.onnx
echo 'LOG 1: Quantizing model finished.'
rm ${model_path}${model_list[0]}_ex_v21.onnx*
python pad_two.py -path ${model_path}${model_list[0]}_ex_v21_INT4_QDQ.onnx
echo 'LOG 1: Aligning model output with input finished.'
rm ${model_path}${model_list[0]}_ex_v21_INT4_QDQ.onnx*
python rename.py -i ${model_path}${model_list[0]}_ex_v21_INT4_QDQ_final.onnx -o ${model_path}1_prefill_INT4_${max_input_len}_${max_cache_len}.onnx -s 20000000
rm ${model_path}${model_list[0]}_ex_v21_INT4_QDQ_final.onnx*
echo 'LOG 1: Renaming model finished.'
python convert.py -path ${model_path}${model_list[1]}.onnx
echo 'LOG 2: Generating model with external data finished.'
python opset_convert.py -path ${model_path}${model_list[1]}_ex.onnx -v 21
echo 'LOG 2: Generating model with new opset finished.'
rm ${model_path}${model_list[1]}_ex.onnx*
python quantize_int4.py -path ${model_path}${model_list[1]}_ex_v21.onnx
echo 'LOG 2: Quantizing model finished.'
rm ${model_path}${model_list[1]}_ex_v21.onnx*
python pad_two.py -path ${model_path}${model_list[1]}_ex_v21_INT4_QDQ.onnx --decode
echo 'LOG 2: Aligning model output with input finished.'
rm ${model_path}${model_list[1]}_ex_v21_INT4_QDQ.onnx*
python rename.py -i ${model_path}${model_list[1]}_ex_v21_INT4_QDQ_final.onnx -o ${model_path}2_decode_INT4_${max_input_len}_${max_cache_len}.onnx -s 20000000
rm ${model_path}${model_list[1]}_ex_v21_INT4_QDQ_final.onnx*
echo 'LOG 2: Renaming model finished.'
mkdir ${temp_path}
mv ${model_path}1_prefill_INT4_${max_input_len}_${max_cache_len}.onnx* ${temp_path}
mv ${model_path}2_decode_INT4_${max_input_len}_${max_cache_len}.onnx* ${temp_path}
rm -r ${model_path}