-
Notifications
You must be signed in to change notification settings - Fork 0
/
run.sh
181 lines (165 loc) · 12.7 KB
/
run.sh
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
#!/bin/bash
set -xue
# ___________________________________________________
# ________ Fine-tuned Student Model Training ________
# >> Training models with standard train/dev/test splits
# python3 main.py --dataset topv2 --task fine_tune --log-interval 600 --do-train --do-save \
# --model godel --size medium --learning-rate 3e-5 --source-max-len 256 --batch-size 14 \
# --domain reminder --threshold 1.2 --n-epochs 10 --quantify --metric accuracy --prune-keep 2
# python3 main.py --dataset crossner --task fine_tune --log-interval 200 --do-train --do-save \
# --n-epochs 10 --model godel --size small --learning-rate 1e-4 --quantify --batch-size 10 \
# --threshold 1.2 --source-max-len 256 --domain literature --verbose
# >> Different dataset settings (full data, few shot, cross-domain, etc.)
# python3 main.py --dataset topv2 --task fine_tune --log-interval 500 --do-train --do-save \
# --n-epochs 14 --model godel --size small --learning-rate 1e-4 --quantify --domain reminder \
# --source-max-len 64 --threshold 1.2 --verbose --setting full --metric accuracy
# python3 main.py --dataset nlu++ --task fine_tune --log-interval 500 --do-train \
# --model godel --size small --learning-rate 1e-4 --setting cross --quantify \
# --source-max-len 256 --threshold 1.1 --ignore-cache --domain banking
# >> Training and evaluating end-to-end with synthesized data
# python3 main.py --dataset topv2 --task end_to_end --log-interval 600 --do-train --verbose \
# --n-epochs 7 --model godel --size medium --learning-rate 3e-5 --quantify --domain reminder \
# --source-max-len 128 --threshold 1.2 --method msp --mixture bottleneck --setting few_shot \
# --ignore-cache --metric accuracy
# python3 main.py --dataset crossner --task end_to_end --log-interval 500 --do-train --do-save \
# --n-epochs 7 --model godel --size medium --learning-rate 3e-5 --quantify --domain literature \
# --source-max-len 256 --threshold 1.2 --method msp --mixture pooling --setting full --verbose
# python3 main.py --dataset nlu++ --task end_to_end --do-eval --model godel --size medium \
# --domain hotels --quantify --batch-size 24 --threshold 1.2 --checkpoint acc650_lr0.0001_epoch7.pt
# _______________________________________________________
# _______ Mixing Soft Prompts w/ Frozen Large LM ______
# >> Soft Prompt Tuning, roughly 30% faster
# python3 main.py --dataset nlu++ --task soft_prompt --do-train --n-tokens 100 --domain hotels \
# --model godel --size large --source-max-len 128 --quantify --qualify --log-interval 100 \
# --n-epochs 14 --learning-rate 0.1 --batch-size 8 --setting cross --verbose
# accelerate launch main.py --dataset crossner --task soft_prompt --do-train --n-tokens 100 \
# --model gpt --size giant --source-max-len 256 --quantify --setting full --log-interval 140 \
# --n-epochs 10 --learning-rate 1e-4 --domain literature --batch-size 4 --accelerate \
# --grad-accum-steps 8 --target-max-len 100 --verbose
# python3 main.py --dataset actdial --task soft_prompt --do-train --n-tokens 100 \
# --model gpt --size medium --source-max-len 256 --target-max-len 64 --quantify \
# --n-epochs 10 --learning-rate 3e-3 --batch-size 7 --warmup-steps 0.05 --do-save
# >> In Context Learning, Inference Only
# python main.py --dataset nlu++ --task in_context --n-tokens 100 --batch-size 16 \
# --model t5 --size medium --source-max-len 64 --quantify --log-interval 300 \
# python main.py --dataset nlu++ --task in_context --do-train --n-tokens 100 --setting cross \
# --model gpt --size giant --source-max-len 64 --quantify --batch-size 4 --domain hotels
# >> In context learning, OpenAI GPT
# base
# python main.py --dataset nlu++ --domain banking --task in_context \
# --model api --size large --openai-key <api_key> --source-max-len 64 \
# --quantify --log-interval 300 --verbose --pool-size 5 --num-shot 5 --ignore-cache
# chain of thought
# python main.py --dataset nlu++ --domain hotels --task in_context \
# --model api --size giant --openai-key <api_key> --source-max-len 64 \
# --quantify --log-interval 300 --verbose --pool-size 5 --num-shot 5 --icl-type cot --ignore-cache
# python main.py --dataset crossner --domain politics --task in_context \
# --model api --size giant --openai-key <api_key> --source-max-len 64 \
# --quantify --log-interval 300 --verbose --pool-size 5 --num-shot 5 --icl-type cot --ignore-cache
# python main.py --dataset topv2 --domain weather --task in_context \
# --model api --size giant --openai-key <api_key> \
# --quantify --log-interval 300 --verbose --metric accuracy --pool-size 5 --num-shot 5 --icl-type cot --ignore-cache
# ______________________________________________________
# ________ Synthesize task for DataAug and CTG and OpenAI ________
# >> Pre-train a Generator for Synthesizing Data
# python main.py --dataset nlu++ --task synthesize --do-train --quantify --qualify --do-save \
# --size medium --batch-size 4 --n-epochs 12 --learning-rate 4e-5 --patience 3 --prune-keep 1\
# --source-max-len 64 --target-max-len 64 --ignore-cache --verbose --metric bleu --method dexpert
# python main.py --dataset topv2 --setting few_shot --task synthesize --do-train --quantify --do-save \
# --size medium --batch-size 4 --n-epochs 12 --learning-rate 4e-5 --patience 5 --prune-keep 1\
# --source-max-len 64 --target-max-len 128 --ignore-cache --metric bleu --method dexpert
# python main.py --dataset crossner --setting full --task synthesize --do-train --quantify --do-save \
# --size medium --batch-size 4 --n-epochs 12 --learning-rate 4e-5 --patience 5 --prune-keep 2\
# --source-max-len 256 --target-max-len 256 --ignore-cache --metric bleu --method dexpert
# >> Data Augmentation (no training)
# python main.py --dataset nlu++ --task synthesize --do-save --model aug --method para \
# --temperature 1.4 --debug
# python main.py --dataset crossner --task synthesize --model aug --method eda --threshold 0.2
# python main.py --dataset topv2 --task synthesize --model aug --method rtt --do-save
# python main.py --dataset nlu++ --task synthesize --model aug --method fill --do-save --verbose
# >> Controlled Text Generation (no training)
# python main.py --dataset nlu++ --domain hotels --setting cross --task synthesize --target-max-len 48 \
# --size medium --model bert --method cvae --verbose --n-tokens --do-save
# python main.py --dataset topv2 --domain weather --task synthesize --target-max-len 48 \
# --size medium --method prefix --verbose --threshold 1.4 --temperature 0.7 --do-save
# python main.py --num-shot 50 --dataset crossner --domain music --task synthesize --target-max-len 48 \
# --size medium --method clm --verbose --threshold 1.4 --temperature 0.7 --do-save
# python main.py --dataset nlu++ --task synthesize --n-tokens 100 --batch-size 8 --do-save \
# --model gpt --size large --source-max-len 64 --quantify --log-interval 100 --verbose \
# --method msp --mixture concat --num-shot 1 --metric bleu --domain banking --threshold 1.2
# >> OpenAI GPT
# python main.py --dataset crossner --domain music --task synthesize --verbose \
# --model api --size giant --openai-key <api_key> \
# --num-shot 2 --method none --setting full --do-save --ignore-cache --num-generations 6
# python main.py --dataset nlu++ --domain hotels --task synthesize --verbose \
# --model api --size giant --openai-key <api_key> \
# --num-shot 2 --method none --setting cross --do-save --ignore-cache --num-generations 6
# python main.py --dataset topv2 --domain weather --task synthesize --verbose \
# --model api --size giant --openai-key <api_key> \
# --num-shot 2 --method none --setting few_shot --do-save --ignore-cache --num-generations 6
# __________________________________________________________
# ________ Experiments with Mixture of Soft Prompts ________
# >> Changing the composition attributes
# python3 main.py --dataset nlu++ --task synthesize --do-train --do-save --quantify --method msp \
# --n-epochs 14 --model godel --size giant --source-max-len 64 --target-max-len 64 --metric bleu \
# --learning-rate 0.1 --mixture concat --batch-size 6 --grad-accum-steps 4 --log-interval 100
# python3 main.py --dataset crossner --task synthesize --do-train --quantify --verbose --do-save \
# --n-epochs 10 --model godel --size large --source-max-len 128 --target-max-len 64 --metric bleu \
# --learning-rate 0.3 --method msp --mixture concat --batch-size 6 --grad-accum-steps 4 \
# --filter --log-interval 400 --setting full --qualify --prune-keep 6
# >> Generate data using a trained MSP model
# python3 main.py --dataset crossner --task synthesize --n-tokens 100 --source-max-len 64 --setting full \
# --model godel --size large --quantify --method msp --mixture concat --num-shot 2 --domain music \
# --filter --do-save --temperature 2.0 --threshold 2.0 --checkpoint acc118_lr0.3_epoch2.pt
# python3 main.py --dataset topv2 --task synthesize --n-tokens 100 --source-max-len 64 --ignore-cache \
# --model godel --size giant --quantify --method msp --mixture bottleneck --num-generations 10 \
# --filter --do-save --temperature 1.6 --threshold 2.0 --domain reminder --checkpoint acc380_lr0.3_epoch3.pt
# >> Ablations
# python3 main.py --dataset crossner --task synthesize --do-train --debug --learning-rate 0.03 \
# --model gpt --size giant --quantify --qualify --metric bleu --setting full \
# --n-epochs 10 --method msp --mixture concat --batch-size 4 --grad-accum-steps 6
# python3 main.py --dataset topv2 --task synthesize --do-train --do-save --n-tokens 100 \
# --model gpt --size giant --source-max-len 64 --quantify --qualify --metric bleu \
# --n-epochs 14 --learning-rate 0.01 --batch-size 6 --prune-keep 6 --patience 5 \
# --method msp --mixture concat --verbose --grad-accum-steps 4 --num-shot 1 --threshold 1.2
# python main.py --dataset nlu++ --task synthesize --do-train --debug --n-tokens 50 \
# --model gpt --size small --source-max-len 128 --quantify --log-interval 200 \
# --n-epochs 7 --learning-rate 1e-5 --method msp --mixture concat --filter
# _____________________________________________
# ______________ Special Modes ________________
# >> Qualitative Testing
# python interact.py --task synthesize --model gpt --size small --threshold 1.2 --temperature 1.0 \
# --dataset nlu++ --domain hotels --setting cross --num-shot 3 --target-max-len 32 --do-guide
# python interact.py --task synthesize --model gpt --size small --threshold 1.2 --temperature 1.0 \
# --dataset crossner --domain music --setting full --num-shot 3 --target-max-len 32
# python interact.py --task synthesize --model gpt --size small --threshold 1.2 --temperature 1.0 \
# --dataset topv2 --domain reminder --num-shot 3 --target-max-len 32 --do-guide --verbose
# >> Train a classifier for automated text evaluation
# python automatic.py --task classify --model bert --size small --do-train --debug --quantify \
# --learning-rate 3e-5 --dataset topv2 --domain reminder --setting full
# >> Automated Text Evaluation of a given generated data file
# python3 automatic.py --dataset nlu++ --task classify --do-eval --quantify \
# --generated-data-file ./results/msp_example.json
# >> Train oracle classifier for synthesis
# python automatic.py --dataset nlu++ --task classify --do-train --do-save \
# python automatic.py --dataset topv2 --task classify --do-train --do-save \
# --model bert --size large --qualify --ignore-cache --n-epochs 15 \
# --verbose --learning-rate 5e-5 --batch-size 8 --setting full --prune-keep 1
# >> Automated Text Evaluation of a given generated data file with a trained discriminator
# python3 automatic.py --dataset topv2 --domain weather --task classify --do-eval --quantify \
# --generated-data-file assets/cache/topv2/msp_meanpool.json --size medium \
# --checkpoint assets/topv2_weather_intent_classifier_roberta_large \
# python automatic.py --dataset nlu++ --domain hotels --method clm --qualify --do-guide \
# --setting cross --task synthesize
# python automatic.py --dataset topv2 --task classify --do-eval --verbose \
# --do-save --model bert --size large --ignore-cache --qualify \
# --batch-size 8 --setting full --generated-data-path assets/cache/topv2/clm_weather_few_shot.json
# >> Evaluation from saved checkpoint
# python main.py --dataset nlu++ --task soft_prompt --do-eval --n-tokens 100 \
# --model gpt --size small --source-max-len 64 --quantify \
# --batch-size 8 --checkpoint acc070_lr0.003_epoch5.pt
# python3 main.py --dataset crossner --task fine_tune --do-eval --model godel --size medium \
# --domain literature --quantify --batch-size 10 --checkpoint acc650_lr0.0001_epoch7.pt
# >> Smoke test to make sure the code is operational, should reach ~80% F1-score
python main.py --dataset actdial --task fine_tune --do-train --debug --source-max-len 64 \
--model t5 --size small --quantify --log-interval 140 --learning-rate 3e-4