-
Notifications
You must be signed in to change notification settings - Fork 1
/
Copy pathmain.py
138 lines (98 loc) · 3.4 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
import numpy as np
import cupy as cp
import random
import chainer
import chainer.functions as F
import chainer.links as L
from chainer import training
from chainer.training import extensions
from model import *
from updater import *
from iterator import *
################### Configuration #####################
LARGE = False
if LARGE:
TRAIN_SIZE = 180000
VALID_SIZE = 200
MAX_STRING_LEN = 25
NUM_ENCODE_LAYERS = 2
NUM_DECODE_LAYERS = 2
NUM_HIDDEN = 128
BATCH_SIZE = 128
else:
TRAIN_SIZE = 3000
VALID_SIZE = 200
MAX_STRING_LEN = 15
NUM_ENCODE_LAYERS = 1
NUM_DECODE_LAYERS = 1
NUM_HIDDEN = 64
BATCH_SIZE = 64
NUM_EPOCHS = 20
ATTEND = True
DROPOUT = 0.5
DEVICE = -1
GRADIENT_CLIP = 5
LOG = "./log"
word_id_map = {"a" : 0, "b" : 1, "c" : 2, "d" : 3, "<eos>" : 4, "<pad>" : 5}
VOCAB_SIZE = len(word_id_map)
np.random.seed(234)
random.seed(543)
####################################################
################## Sample data #####################
def sample_data(min_length, max_length):
x = np.random.randint(0, VOCAB_SIZE - 2, size=(random.randint(min_length,
max_length),)).tolist()
y = x[::-1]
return x, y
train_set = [sample_data(1, MAX_STRING_LEN) for _ in xrange(TRAIN_SIZE)]
val_set = [sample_data(1, MAX_STRING_LEN) for _ in xrange(VALID_SIZE)]
##################### MAIN ########################
def compute_loss(result, size):
result["train_loss"] = result["main/loss"]
result["train_acc"] = result["main/accuracy"]
if "validation/main/loss" in result:
result["val_loss"] = result["validation/main/loss"]
if "validation/main/accuracy" in result:
result["val_acc"] = result["validation/main/accuracy"]
if ATTEND:
attention_model = GlobalAttention(NUM_HIDDEN)
model = AttentionalEncoderDecoder(attention_model, NUM_ENCODE_LAYERS,
NUM_DECODE_LAYERS, NUM_HIDDEN, VOCAB_SIZE, DROPOUT)
else:
model = EncoderDecoder(NUM_ENCODE_LAYERS, NUM_DECODE_LAYERS, NUM_HIDDEN,
VOCAB_SIZE, DROPOUT)
model = Seq2SeqClassifier(model)
# Set up for using GPU. Very easy!
xp = np
if DEVICE >= 0:
chainer.cuda.get_device(DEVICE).use()
model.to_gpu()
xp = cp
# Optimizer.
optimizer = chainer.optimizers.Adam()
optimizer.setup(model)
optimizer.add_hook(chainer.optimizer.GradientClipping(GRADIENT_CLIP))
# Data iterators.
train_iter = Seq2SeqIterator(xp, train_set, BATCH_SIZE, word_id_map["<eos>"],
word_id_map["<pad>"], shuffle=False, repeat=True)
val_iter = Seq2SeqIterator(xp, val_set, BATCH_SIZE, word_id_map["<eos>"],
word_id_map["<pad>"], shuffle=False, repeat=False)
# Updater (training loop).
updater = Seq2SeqUpdater(train_iter, optimizer, DEVICE)
# Trainer
trainer = training.Trainer(updater, stop_trigger=(NUM_EPOCHS, "epoch"), out=LOG)
# Attach monitoring and evaluating extensions.
trainer.extend(extensions.Evaluator(val_iter, model, device=DEVICE,
eval_func=lambda (x_batch, y_batch) : model(x_batch, y_batch, train=False)))
interval = 1
trainer.extend(extensions.LogReport(
postprocess=lambda result: compute_loss(result, TRAIN_SIZE),
trigger=(interval, "epoch")))
trainer.extend(extensions.PrintReport(
["epoch", "iteration", "train_loss", "train_acc", "val_loss", "val_acc"]),
trigger=(interval, "epoch"))
trainer.extend(extensions.ProgressBar(update_interval=1))
trainer.extend(extensions.snapshot(trigger=(interval, "epoch")))
trainer.extend(
extensions.snapshot_object(model, "model_epoch_{.updater.epoch}"))
trainer.run()