-
Notifications
You must be signed in to change notification settings - Fork 0
/
inception_module_model.py
276 lines (229 loc) · 17.3 KB
/
inception_module_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
import functools
import tensorflow as tf
from data import image_size, num_channels, num_labels
from model import Model
from neural_network import get_filter_output_size, depth_concat, get_l2_loss
print_messages = False
class Module():
def __init__(self, one_by_one_conv_weights_to_three_by_three, one_by_one_conv_biases_to_three_by_three, one_by_one_conv_weights_to_five_by_five, one_by_one_conv_biases_to_five_by_five,
one_by_one_conv_weights_to_depthconcat, one_by_one_conv_biases_to_depthconcat, one_by_one_conv_weights_from_max_pool, one_by_one_conv_biases_from_max_pool, three_by_three_conv_weights,
three_by_three_conv_biases, five_by_five_conv_weights, five_by_five_conv_biases):
self.one_by_one_conv_weights_to_three_by_three = one_by_one_conv_weights_to_three_by_three
self.one_by_one_conv_biases_to_three_by_three = one_by_one_conv_biases_to_three_by_three
self.one_by_one_conv_weights_to_five_by_five = one_by_one_conv_weights_to_five_by_five
self.one_by_one_conv_biases_to_five_by_five = one_by_one_conv_biases_to_five_by_five
self.one_by_one_conv_weights_to_depthconcat = one_by_one_conv_weights_to_depthconcat
self.one_by_one_conv_biases_to_depthconcat = one_by_one_conv_biases_to_depthconcat
self.one_by_one_conv_weights_from_max_pool = one_by_one_conv_weights_from_max_pool
self.one_by_one_conv_biases_from_max_pool = one_by_one_conv_biases_from_max_pool
self.three_by_three_conv_weights = three_by_three_conv_weights
self.three_by_three_conv_biases = three_by_three_conv_biases
self.five_by_five_conv_weights = five_by_five_conv_weights
self.five_by_five_conv_biases = five_by_five_conv_biases
def get_weights(self):
return [self.one_by_one_conv_weights_to_three_by_three, self.one_by_one_conv_weights_to_five_by_five, self.one_by_one_conv_weights_to_depthconcat,
self.one_by_one_conv_weights_from_max_pool, self.three_by_three_conv_weights, self.five_by_five_conv_weights]
def create_inception_module(in_channels, feature_maps, input_spatial_size, initialised_weights_stddev):
"""
Inception module:
input layer
1x1+1 conv, 1x1+1 conv, 3x3+1 maxpool
1x1+1 conv, 3x3+1 conv, 5x5+1 conv, 1x1+1 conv
DepthConcat
"""
_1x1_fm, _1x1_3x3_fm, _3x3_fm, _1x1_5x5_fm, _5x5_fm, mp_1x1_fm = feature_maps
patch_size = 1
one_by_one_conv_weights_to_three_by_three = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, in_channels, _1x1_3x3_fm], stddev=initialised_weights_stddev), name='1x1w_to_3x3')
one_by_one_conv_biases_to_three_by_three = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[_1x1_3x3_fm]), name='1x1b_to_3x3')
one_by_one_conv_weights_to_five_by_five = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, in_channels, _1x1_5x5_fm], stddev=initialised_weights_stddev), name='1x1w_to_5x5')
one_by_one_conv_biases_to_five_by_five = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[_1x1_5x5_fm]), name='1x1b_to_5x5')
one_by_one_conv_weights_to_depthconcat = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, in_channels, _1x1_fm], stddev=initialised_weights_stddev), name='1x1w_to_depth_concat')
one_by_one_conv_biases_to_depthconcat = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[_1x1_fm]), name='1x1b_to_depth_concat')
#number_of_max_pools = 1
#three_by_three_max_pool_output_size = get_filter_output_size(image_size, number_of_max_pools)
max_pool_size = 3
max_pool_stride = 1
three_by_three_max_pool_output_size = get_filter_output_size(input_spatial_size, max_pool_size, max_pool_stride)
if print_messages: print("three_by_three_max_pool_output_size: %s" % three_by_three_max_pool_output_size)
one_by_one_conv_weights_from_max_pool = tf.Variable(tf.truncated_normal(
[three_by_three_max_pool_output_size, three_by_three_max_pool_output_size, in_channels, mp_1x1_fm], stddev=initialised_weights_stddev),
name='1x1w_from_maxpool')
one_by_one_conv_biases_from_max_pool = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[mp_1x1_fm]), name='1x1b_from_maxpool')
patch_size = 3
three_by_three_conv_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, _1x1_3x3_fm, _3x3_fm], stddev=initialised_weights_stddev), name='3x3w')
three_by_three_conv_biases = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[_3x3_fm]), name='3x3b')
patch_size = 5
five_by_five_conv_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, _1x1_5x5_fm, _5x5_fm], stddev=initialised_weights_stddev), name='5x5w')
five_by_five_conv_biases = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[_5x5_fm]), name='5x5b')
# The 3x3 maxpooling layer, and DepthConcat layer don't need any variables.
return Module(one_by_one_conv_weights_to_three_by_three, one_by_one_conv_biases_to_three_by_three, one_by_one_conv_weights_to_five_by_five, one_by_one_conv_biases_to_five_by_five,
one_by_one_conv_weights_to_depthconcat, one_by_one_conv_biases_to_depthconcat, one_by_one_conv_weights_from_max_pool, one_by_one_conv_biases_from_max_pool, three_by_three_conv_weights,
three_by_three_conv_biases, five_by_five_conv_weights, five_by_five_conv_biases)
def create_inception_module_graph(module, input_tensor):
# Adjacent layer 1
if print_messages: print("input_tensor shape:", input_tensor.get_shape().as_list())
if print_messages: print("one_by_one_conv_weights_to_three_by_three shape:", module.one_by_one_conv_weights_to_three_by_three.get_shape().as_list())
conv = tf.nn.conv2d(input_tensor, module.one_by_one_conv_weights_to_three_by_three, [1, 1, 1, 1], padding='SAME')
one_by_one_conv_weights_to_three_by_three_output = tf.nn.relu(conv + module.one_by_one_conv_biases_to_three_by_three)
if print_messages: print("one_by_one_conv_weights_to_three_by_three_output shape:", one_by_one_conv_weights_to_three_by_three_output.get_shape().as_list())
conv = tf.nn.conv2d(input_tensor, module.one_by_one_conv_weights_to_five_by_five, [1, 1, 1, 1], padding='SAME')
one_by_one_conv_weights_to_five_by_five_output = tf.nn.relu(conv + module.one_by_one_conv_biases_to_five_by_five)
shape = one_by_one_conv_weights_to_five_by_five_output.get_shape().as_list()
if print_messages: print("one_by_one_conv_weights_to_five_by_five_output shape: %s" % shape)
max_pool_output = tf.nn.max_pool(input_tensor, [1, 3, 3, 1], [1, 1, 1, 1], padding='SAME')
shape = max_pool_output.get_shape().as_list()
if print_messages: print("max_pool_output shape: %s" % shape)
# Adjacent layer 2
conv = tf.nn.conv2d(input_tensor, module.one_by_one_conv_weights_to_depthconcat, [1, 1, 1, 1], padding='SAME')
one_by_one_output_to_depthconcat = tf.nn.relu(conv + module.one_by_one_conv_biases_to_depthconcat)
shape = one_by_one_output_to_depthconcat.get_shape().as_list()
if print_messages: print("one_by_one_output_to_depthconcat shape: %s" % shape)
conv = tf.nn.conv2d(one_by_one_conv_weights_to_three_by_three_output, module.three_by_three_conv_weights, [1, 1, 1, 1], padding='SAME')
three_by_three_output = tf.nn.relu(conv + module.three_by_three_conv_biases)
shape = three_by_three_output.get_shape().as_list()
if print_messages: print("three_by_three_output shape: %s" % shape)
conv = tf.nn.conv2d(one_by_one_conv_weights_to_five_by_five_output, module.five_by_five_conv_weights, [1, 1, 1, 1], padding='SAME')
five_by_five_output = tf.nn.relu(conv + module.five_by_five_conv_biases)
shape = five_by_five_output.get_shape().as_list()
if print_messages: print("five_by_five_output shape: %s" % shape)
if print_messages: print("one_by_one_conv_weights_from_max_pool:", module.one_by_one_conv_weights_from_max_pool.get_shape().as_list())
conv = tf.nn.conv2d(max_pool_output, module.one_by_one_conv_weights_from_max_pool, [1, 1, 1, 1], padding='SAME')
one_by_one_conv_weights_from_max_pool_output = tf.nn.relu(conv + module.one_by_one_conv_biases_from_max_pool)
shape = one_by_one_conv_weights_from_max_pool_output.get_shape().as_list()
if print_messages: print("one_by_one_conv_weights_from_max_pool_output shape: %s" % shape)
depth_concat_output = depth_concat([one_by_one_output_to_depthconcat, three_by_three_output, five_by_five_output, one_by_one_conv_weights_from_max_pool_output])
shape = depth_concat_output.get_shape().as_list()
if print_messages: print("depth_concat_output shape: %s" % shape)
return depth_concat_output
def get_depth_concat_depth(module_feature_maps):
num_of_feature_maps = 6
_1x1_fm, _1x1_3x3_fm, _3x3_fm, _1x1_5x5_fm, _5x5_fm, mp_1x1_fm = range(num_of_feature_maps)
return module_feature_maps[_1x1_fm] + module_feature_maps[_3x3_fm] + module_feature_maps[_5x5_fm] + module_feature_maps[mp_1x1_fm]
def create_inception_module_model(learning_rate = 0.05, initialised_weights_stddev = 0.1, pre_layer_feature_maps = 64, module_feature_maps=[(128, 128, 128, 128, 128, 128)], batch_size = 32, eval_batch_size = 1000, l2_lambda = 0.1, decay_steps = 10000, decay_rate = 0.96, add_pre_layer_maxpool = False):
graph = tf.Graph()
with graph.as_default():
# Input data.
tf_train_dataset = tf.placeholder(tf.float32, shape=(batch_size, image_size, image_size, num_channels))
tf_train_labels = tf.placeholder(tf.float32, shape=(batch_size, num_labels))
eval_dataset = tf.placeholder(tf.float32, shape=(eval_batch_size, image_size, image_size, num_channels))
dropout_keep_probability = tf.placeholder(tf.float32)
"""
In the inception module, we have the following layers:
Pre layers:
7x7 conv + 2(S)
maxpool 3x3 + 2(s)
local response normalisation
#1x1 conv + 1(V)
#maxpool 3x3 + 1(s)
#local response normalisation
Inception modules: 1
Post layers:
AveragePool
Fully Connected Output layer
"""
# Pre layers
post_layer_output_feature_maps = pre_layer_feature_maps
patch_size = 7
stride = 2
seven_by_seven_conv_pre_layer_weights = tf.Variable(tf.truncated_normal(
[patch_size, patch_size, num_channels, post_layer_output_feature_maps], stddev=initialised_weights_stddev), name='7x7_pre_w')
seven_by_seven_conv_pre_layer_biases = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[post_layer_output_feature_maps]), name='7x7_pre_b')
if print_messages: print("image_size:", image_size)
seven_by_seven_conv_pre_layer_output_size = get_filter_output_size(image_size, patch_size, stride)
if print_messages: print("seven_by_seven_conv_pre_layer_output_size:", seven_by_seven_conv_pre_layer_output_size)
three_by_three_maxpool_pre_layer_output_size = get_filter_output_size(seven_by_seven_conv_pre_layer_output_size, patch_size, stride)
#pre_layer_output_spatial_size
if print_messages: print("three_by_three_maxpool_pre_layer_output_size:", three_by_three_maxpool_pre_layer_output_size)
# Module layers
modules = []
in_channels = post_layer_output_feature_maps
in_spatial_size = three_by_three_maxpool_pre_layer_output_size if add_pre_layer_maxpool else seven_by_seven_conv_pre_layer_output_size
number_of_reducing_layers = 2
stride = 1
number_of_adjacent_layers_in_inception_module = 4
for feature_maps in module_feature_maps:
if print_messages: print("in_spatial_size:", in_spatial_size)
module = create_inception_module(in_channels, feature_maps, in_spatial_size, initialised_weights_stddev)
modules.append(module)
in_channels = get_depth_concat_depth(feature_maps)
in_spatial_size = get_filter_output_size(in_spatial_size, number_of_reducing_layers, stride)
# Now a fully connected layer
depth_concat_depth = in_channels #module_feature_maps[-1] * number_of_adjacent_layers_in_inception_module
# I expect avg_pool_ouput to have a shape of (batch_size, 1, 1, depth_concat_depth)
# WARNING: I may have gotten the fc_weights tensor size wrong.
fc_layer_one_neurons = 1000
fc_layer_one_weights = tf.Variable(tf.truncated_normal(
[depth_concat_depth, fc_layer_one_neurons], stddev=initialised_weights_stddev * 10))
fc_layer_one_biases = tf.Variable(tf.constant(initialised_weights_stddev * 10, shape=[fc_layer_one_neurons]))
output_weights = tf.Variable(tf.truncated_normal(
[fc_layer_one_neurons, num_labels], stddev=initialised_weights_stddev))
output_biases = tf.Variable(tf.constant(1.0, shape=[num_labels]))
# Model.
def create_model_graph(data, add_dropout = False):
shape = data.get_shape().as_list()
if print_messages: print("data shape: %s" % shape)
# Pre layers
# 7x7_pre
stride = 2
conv = tf.nn.conv2d(data, seven_by_seven_conv_pre_layer_weights, [1, stride, stride, 1], padding='SAME')
seven_by_seven_conv_pre_layer_output = tf.nn.relu(conv + seven_by_seven_conv_pre_layer_biases)
if print_messages: print("one_by_one_conv_weights_to_three_by_three_output shape: %s" % seven_by_seven_conv_pre_layer_output.get_shape().as_list())
lrn_input = None
if add_pre_layer_maxpool:
# 3x3_mp
patch_size = 3
stride = 2
max_pool_output = tf.nn.max_pool(seven_by_seven_conv_pre_layer_output, [1, patch_size, patch_size, 1], [1, stride, stride, 1], padding='SAME')
lrn_input = max_pool_output
else:
lrn_input = seven_by_seven_conv_pre_layer_output
# https://www.tensorflow.org/versions/r0.10/api_docs/python/nn.html#local_response_normalization
# TODO: tune the following hyperparameters: depth_radius, bias, alpha, beta
lrn_output = tf.nn.local_response_normalization(lrn_input)
pre_layers_output = lrn_output
# Inception module(s)
input_tensor = pre_layers_output
modules_output = None
for module in modules:
# The last module will set modules_output for use below.
modules_output = create_inception_module_graph(module, input_tensor)
if print_messages: print("module output shape:", modules_output.get_shape().as_list())
input_tensor = modules_output
# The patch size of the avg_pool must match the patch_size of the depth_concat_output
# I assume that the padding must be VALID based on Google's white paper: http://arxiv.org/pdf/1409.4842v1.pdf
depth_concat_output_image_size_index = 1
largest_patch_size = modules_output.get_shape().as_list()[depth_concat_output_image_size_index]
if print_messages: print("largest_patch_size:", largest_patch_size)
avg_pool_output = tf.nn.avg_pool(modules_output, [1, largest_patch_size, largest_patch_size, 1], [1, 1, 1, 1], padding='VALID', name=None)
shape = avg_pool_output.get_shape().as_list()
if print_messages: print("avg_pool_output shape:", shape)
# Flatten the average_pool_output from 4 dimensions down to 2.
batch_index = 0
reshape_tensor = tf.reshape(avg_pool_output, (data.get_shape().as_list()[batch_index], 1 * 1 * depth_concat_depth))
if print_messages: print("reshape_tensor shape: %s" % reshape_tensor.get_shape().as_list())
# Post layers
# fully connected
output = tf.nn.relu(tf.matmul(reshape_tensor, fc_layer_one_weights) + fc_layer_one_biases)
if print_messages: print("fc_layer_one_output shape: %s" % output.get_shape().as_list())
if add_dropout:
output = tf.nn.dropout(output, dropout_keep_probability)
return tf.matmul(output, output_weights) + output_biases
# Training computation.
logits = create_model_graph(tf_train_dataset, add_dropout = True)
module_weights = functools.reduce(lambda a, b: a + b, map(lambda m: m.get_weights(), modules))
layer_weights = [ seven_by_seven_conv_pre_layer_weights ] + module_weights + [ fc_layer_one_weights, output_weights ]
loss = tf.reduce_mean(tf.nn.softmax_cross_entropy_with_logits(logits, tf_train_labels) + get_l2_loss(l2_lambda, layer_weights))
# Optimizer.
global_step = tf.Variable(0, name="global_step", trainable=False)
decayed_learning_rate = tf.train.exponential_decay(learning_rate, global_step, decay_steps, decay_rate)
optimizer = tf.train.GradientDescentOptimizer(decayed_learning_rate).minimize(loss, global_step=global_step)
# Predictions for the training, validation, and test data.
train_prediction = tf.nn.softmax(logits)
eval_prediction = tf.nn.softmax(create_model_graph(eval_dataset))
saver = tf.train.Saver()
return Model(graph, batch_size, eval_batch_size, tf_train_dataset, tf_train_labels, eval_dataset, dropout_keep_probability, logits, loss, optimizer, train_prediction, eval_prediction, saver, global_step, layer_weights)