-
Notifications
You must be signed in to change notification settings - Fork 0
/
neural_styler.py
264 lines (226 loc) · 9.17 KB
/
neural_styler.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
# load necesssary packages
from __future__ import print_function
import cv2
import time
import numpy as np
from scipy.misc import imsave
from keras import backend as K
from scipy.optimize import fmin_l_bfgs_b
from keras.applications import vgg16, vgg19
from keras.preprocessing.image import load_img
from utils import preprocess_image, deprocess_image
from losses import style_reconstruction_loss, feature_reconstruction_loss, total_variation_loss
import tensorflow as tf
class Neural_Styler(object):
"""
Mix the style of an image with the content of another.
References
----------
- "A Neural Algorithm of Artistic Style", [arXiv:1508.06576]: Leon A. Gatys,
Alexander S. Ecker and Matthias Bethge.
"""
def __init__(self,
base_img_path,
style_img_path, style_img_path2,
output_img_path,
output_width,
convnet,
content_weight,
style_weight,
tv_weight,
content_layer,
style_layers,
iterations):
"""
Initialize and store parameters of the neural styler. Initialize the
desired convnet and compute the 3 losses and gradients with respect to the
output image.
Params
------
- input_img: tensor containing: content_img, style_img and output_img.
- convnet: [string], defines which VGG to use: vgg16 or vgg19.
- style_layers: list containing name of layers to use for style
reconstruction. Defined in Gatys et. al but can be changed.
- content_layer: string containing name of layer to use for content
reconstruction. Also defined in Gatys et. al.
- content_weight: weight for the content loss.
- style_weight: weight for the style loss.
- tv_weight: weight for the total variation loss.
- iterations: iterations for optimization algorithm
- output_img_path: path to output image.
Notes
-----
[1] If user specifies output width, then calculate the corresponding
image height. Else, output image height and width should be the
same as that of the content image. Also note that style image
should be resized to whatever was decided above.
[2] PIL returns (width, height) whereas numpy returns (height, width)
since nrows=height and ncols=width.
[3] L_BFGS requires that loss and grad be two functions so we create
a keras function that computes the gradients and loss and return
each separately using two different class methods.
"""
print('\nInitializing Neural Style model...')
# store paths
self.base_img_path = base_img_path
self.style_img_path = style_img_path
self.style_img_path2 = style_img_path2
self.output_img_path = output_img_path
# configuring image sizes [1, 2]
print('\n\tResizing images...')
self.width = output_width
width, height = load_img(self.base_img_path).size
new_dims = (height, width)
# store shapes for future use
self.img_nrows = height
self.img_ncols = width
if self.width is not None:
# calculate new height
num_rows = int(np.floor(float(height * self.width / width)))
new_dims = (num_rows, self.width)
# update the stored shapes
self.img_nrows = num_rows
self.img_ncols = self.width
###输入图片,原图,style1,style2
# resize content and style images to this desired shape
output_width = 400
# self.content_img = K.variable(preprocess_image(self.base_img_path, new_dims))
# self.style_img2 = K.variable(preprocess_image(self.style_img_path2, new_dims))
# self.style_img = K.variable(preprocess_image(self.style_img_path, new_dims))
self.content_img = cv2.imread(self.base_img_path)
self.style_img2 = cv2.imread(self.style_img_path2)
self.style_img = cv2.imread(self.style_img_path)
self.content_img = cv2.resize(self.content_img,(output_width,output_width))
self.style_img = cv2.resize(self.style_img,(output_width,output_width))
self.style_img2 = cv2.resize(self.style_img2,(output_width,output_width))
self.style_img2 = np.expand_dims(self.style_img2, axis=0)
self.style_img = np.expand_dims(self.style_img, axis=0)
self.content_img = np.expand_dims(self.content_img, axis=0)
# self.style_img = tf.add(self.style_img,self.style_img2)
# and also create output placeholder with desired shape
if K.image_dim_ordering() == 'th':
self.output_img = K.placeholder((1, 3, output_width, output_width))
else:
self.output_img = K.placeholder((1, output_width, output_width, 3))
# sanity check on dimensions
# print("\tSize of content image is: {}".format(K.int_shape(self.content_img)))
# print("\tSize of style image is: {}".format(K.int_shape(self.style_img)))
# print("\tSize of output image is: {}".format(K.int_shape(self.output_img)))
# combine the 3 images into a single Keras tensor
self.input_img = K.concatenate([self.content_img,
self.style_img,self.style_img2,
self.output_img], axis=0)
# self.convnet = convnet
self.convnet = convnet
self.iterations = iterations
# store weights of the loss components
self.content_weight = content_weight
self.style_weight = style_weight
self.tv_weight = tv_weight
# store convnet layers
self.content_layer = content_layer
self.style_layers = style_layers
# initialize the vgg16 model
print('\tLoading {} model'.format(self.convnet.upper()))
if self.convnet == 'vgg16':
self.model = vgg16.VGG16(input_tensor=self.input_img,
weights='imagenet',
include_top=False)
else:
# self.model.load_weights()
self.model = vgg19.VGG19(input_tensor=self.input_img,weights='./vgg19_weights_tf_dim_ordering_tf_kernels_notop.h5', include_top=False)
# self.model = vgg19.VGG19(input_tensor=self.input_img,
# weights='imagenet',
# include_top=False)
print('\tComputing losses...')
# get the symbolic outputs of each "key" layer (we gave them unique names).
outputs_dict = dict([(layer.name, layer.output) for layer in self.model.layers])
# extract features only from the content layer
content_features = outputs_dict[self.content_layer]
# extract the activations of the base image and the output image
base_image_features = content_features[0, :, :, :] # 0 corresponds to base
combination_features = content_features[3, :, :, :] # 2 coresponds to output
#计算内容损失
# calculate the feature reconstruction loss
content_loss = self.content_weight * \
feature_reconstruction_loss(base_image_features,
combination_features)
# for each style layer compute style loss
# total style loss is then weighted sum of those losses
temp_style_loss = K.variable(0.0)
weight = 1.0 / float(len(self.style_layers))
#计算风格损失
for layer in self.style_layers:
# extract features of given layer
style_features = outputs_dict[layer]
# from those features, extract style and output activations
style_image_features1 = style_features[1, :, :, :]
style_image_features2 = style_features[2, :, :, :]
output_style_features = style_features[3, :, :, :]
temp_style_loss += weight * \
style_reconstruction_loss(style_image_features1,style_image_features2,
output_style_features,
self.img_nrows,
self.img_ncols)
style_loss = self.style_weight * temp_style_loss
# compute total variational loss
tv_loss = self.tv_weight * total_variation_loss(self.output_img,
self.img_nrows,
self.img_ncols)
#总损失
# composite loss
total_loss = content_loss + style_loss + tv_loss
# compute gradients of output img with respect to loss
print('\tComputing gradients...')
grads = K.gradients(total_loss, self.output_img)
outputs = [total_loss]
if type(grads) in {list, tuple}:
outputs += grads
else:
outputs.append(grads)
# [3]
self.loss_and_grads = K.function([self.output_img], outputs)
def style(self):
"""
Run L-BFGS over the pixels of the generated image so as to
minimize the neural style loss.
"""
print('\nDone initializing... Ready to style!')
if K.image_dim_ordering() == 'th':
x = np.random.uniform(0, 255, (1, 3, self.img_nrows, self.img_ncols)) - 128.
else:
x = np.random.uniform(0, 255, (1, self.img_nrows, self.img_ncols, 3)) - 128.
# print(x.shape)
#running network
for i in range(self.iterations):
print('\n\tIteration: {}'.format(i+1))
toc = time.time()
x, min_val, info = fmin_l_bfgs_b(self.loss, x.flatten(), fprime=self.grads, maxfun=20)
# save current generated image
img = deprocess_image(x.copy(), self.img_nrows, self.img_ncols)
fname = self.output_img_path + '_at_iteration_%d.png' % (i+1)
imsave(fname, img)
tic = time.time()
print('\t\tImage saved as', fname)
print('\t\tLoss: {:.2e}, Time: {} seconds'.format(float(min_val), float(tic-toc)))
def loss(self, x):
# reshape
if K.image_dim_ordering() == 'th':
x = x.reshape((1, 3, self.img_nrows, self.img_ncols))
else:
x = x.reshape((1, self.img_nrows, self.img_ncols, 3))
outs = self.loss_and_grads([x])
loss_value = outs[0]
return loss_value
def grads(self, x):
# reshape
if K.image_dim_ordering() == 'th':
x = x.reshape((1, 3, self.img_nrows, self.img_ncols))
else:
x = x.reshape((1, self.img_nrows, self.img_ncols, 3))
outs = self.loss_and_grads([x])
if len(outs[1:]) == 1:
grad_values = outs[1].flatten().astype('float64')
else:
grad_values = np.array(outs[1:]).flatten().astype('float64')
return grad_values