-
Notifications
You must be signed in to change notification settings - Fork 0
/
train.py
136 lines (111 loc) · 4.31 KB
/
train.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
import argparse
import os
import torch
from safetensors.torch import load_file
from transformers import AutoConfig
from model import Hypformer
from dataset import Dataset
from trainer import Trainer
def create_parser() -> argparse.ArgumentParser:
parser = argparse.ArgumentParser()
parser.add_argument("--no_wandb", action="store_true")
parser.add_argument("--project", type=str, default="hyp")
parser.add_argument("--name", type=str, default=None)
parser.add_argument("--encoder_name", type=str, default="tunib/electra-ko-en-base")
parser.add_argument("--d_eh", type=int, default=None)
parser.add_argument("--d_model", type=int, default=64)
parser.add_argument("--d_k", type=int, default=8)
parser.add_argument("--d_v", type=int, default=None)
parser.add_argument("--n_head", type=int, default=8)
parser.add_argument("--d_ff", type=int, default=None)
parser.add_argument("--n_layer", type=int, default=6)
parser.add_argument("--dataset", type=str, default="PRIV_01")
parser.add_argument("--mask_label", action="store_true")
parser.add_argument("--use_weight", action="store_true")
parser.add_argument("--lr", type=float, default=5e-05)
parser.add_argument("--batch_size", type=int, default=128)
parser.add_argument("--n_bb", type=int, default=1)
parser.add_argument("--n_print", type=int, default=50)
parser.add_argument("--n_val", type=int, default=1000)
parser.add_argument("--n_save", type=int, default=5000)
parser.add_argument("--n_iter", type=int, default=5000)
parser.add_argument("--ckpt", type=int, default=None)
parser.add_argument("--ckpt_path", type=str, default="./checkpoints")
parser.add_argument("--detect_anomaly", action="store_true")
return parser
if __name__ == "__main__":
parser = create_parser()
args = parser.parse_args()
dataset = Dataset(args.dataset)
n_label, max_depth = dataset.n_label, dataset.max_depth
d_encoder = AutoConfig.from_pretrained(args.encoder_name).hidden_size
if args.d_eh is None:
args.d_eh = d_encoder * 4
if args.d_v is None:
args.d_v = args.d_k
if args.d_ff is None:
args.d_ff = args.d_model * 4
model = Hypformer(
d_encoder,
args.d_eh,
args.d_model,
args.d_k,
args.d_v,
args.n_head,
args.d_ff,
args.n_layer,
n_label,
max_depth
)
if not os.path.exists(args.ckpt_path):
os.mkdir(args.ckpt_path)
n_ckpt = args.ckpt if args.ckpt is not None else 0
def save_path(x: int) -> str:
alter_name = "@".join(args.encoder_name.split("/"))
save_path = f"{args.ckpt_path}/{args.dataset}-{alter_name}-{args.d_eh}-{args.d_model}-{args.d_k}-{args.d_v}-{args.n_head}-{args.d_ff}-{args.n_layer}-ckpt-{x+n_ckpt}"
return save_path
if args.ckpt is not None:
ckpt = load_file(save_path(0) + "/model.safetensors")
model.load_state_dict(ckpt)
trainer = Trainer(
model=model,
encoder_name=args.encoder_name,
dataset=dataset
)
if args.detect_anomaly:
torch.autograd.detect_anomaly()
if args.name is None:
mask_label = "-mask" if args.mask_label else ""
use_weight = "-weight" if args.use_weight else ""
args.name = f"{args.dataset}-{args.encoder_name}-{args.d_eh}-{args.d_model}-{args.d_k}-{args.d_v}-{args.n_head}-{args.d_ff}-{args.n_layer}"
args.name += f"{mask_label}{use_weight}"
config_wandb = {
"project": args.project,
"name": args.name,
"config": {
"dataset": args.dataset,
"encoder": args.encoder_name,
"d_eh": args.d_eh,
"d_model": args.d_model,
"d_k": args.d_k,
"d_v": args.d_v,
"n_head": args.n_head,
"d_ff": args.d_ff,
"n_layer": args.n_layer,
"lr": args.lr,
"batch_size": args.batch_size * args.n_bb
}
} if not args.no_wandb else None
trainer.train(
lr=args.lr,
batch_size=args.batch_size,
n_bb=args.n_bb,
n_print=args.n_print,
n_val=args.n_val,
n_save=args.n_save,
n_iter=args.n_iter,
save_path=save_path,
mask_label=args.mask_label,
use_weight=args.use_weight,
config_wandb=config_wandb
)