-
Notifications
You must be signed in to change notification settings - Fork 9
/
Copy pathfinetune_jointgt_t5.sh
80 lines (76 loc) · 2.53 KB
/
finetune_jointgt_t5.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
#!/bin/bash
CUDA_VISIBLE_DEVICES=0,1 python cli_gt.py \
--do_train \
--model_name t5 \
--output_dir out/jointgt_t5_webnlg \
--train_file data/webnlg/train \
--predict_file data/webnlg/val \
--model_path pretrain_model/jointgt_t5 \
--tokenizer_path pretrain_model/jointgt_t5 \
--dataset webnlg \
--train_batch_size 24 \
--predict_batch_size 24 \
--max_input_length 256 \
--max_output_length 128 \
--append_another_bos \
--learning_rate 5e-5 \
--num_train_epochs 30 \
--warmup_steps 1600 \
--eval_period 800 \
--num_beams 5
CUDA_VISIBLE_DEVICES=0,1 python cli_gt.py \
--do_train \
--model_name t5 \
--output_dir out/jointgt_t5_webnlg_const \
--train_file data/webnlg_const/train \
--predict_file data/webnlg_const/dev \
--model_path pretrain_model/jointgt_t5 \
--tokenizer_path pretrain_model/jointgt_t5 \
--dataset webnlg \
--train_batch_size 32 \
--predict_batch_size 32 \
--max_input_length 256 \
--max_output_length 128 \
--append_another_bos \
--learning_rate 3e-5 \
--num_train_epochs 30 \
--warmup_steps 1200 \
--eval_period 600 \
--num_beams 5 \
--clean_up_spaces
CUDA_VISIBLE_DEVICES=0,1 python cli_gt.py \
--do_train \
--model_name t5 \
--output_dir out/jointgt_t5_wq \
--train_file data/wq/train \
--predict_file data/wq/dev \
--model_path pretrain_model/jointgt_t5 \
--tokenizer_path pretrain_model/jointgt_t5 \
--dataset webnlg \
--train_batch_size 32 \
--predict_batch_size 32 \
--max_input_length 256 \
--max_output_length 64 \
--learning_rate 1e-4 \
--num_train_epochs 40 \
--warmup_steps 2300 \
--eval_period 600 \
--num_beams 5
CUDA_VISIBLE_DEVICES=0,1 python -u cli_gt.py \
--do_train \
--model_name t5 \
--output_dir out/jointgt_t5_pq \
--train_file data/pq/train \
--predict_file data/pq/dev \
--model_path pretrain_model/jointgt_t5 \
--tokenizer_path pretrain_model/jointgt_t5 \
--dataset webnlg \
--train_batch_size 32 \
--predict_batch_size 32 \
--max_input_length 128 \
--max_output_length 64 \
--learning_rate 2e-5 \
--num_train_epochs 30 \
--warmup_steps 900 \
--eval_period 300 \
--num_beams 5