-
Notifications
You must be signed in to change notification settings - Fork 8
/
grad_cam_model.py
207 lines (166 loc) · 8.78 KB
/
grad_cam_model.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
# -*- coding: utf-8 -*-
"""
Utility functions.
Created on Fri Feb 10 19:30:28 2017
@author: M. Waleed Gondal
"""
import tensorflow as tf
import numpy as np
import pickle
from tensorflow.python.framework import ops
from tensorflow.python.ops import gen_nn_ops
@ops.RegisterGradient("GuidedRelu")
def _GuidedReluGrad(op, grad):
return tf.select(0. < grad, gen_nn_ops._relu_grad(grad, op.outputs[0]), tf.zeros_like(grad))
class Grad_CAM():
"""Grad_CAM (Gradient based Class Activation Maps) implements a weakly supervised localization method proposed at NIP 2016
workshop. https://arxiv.org/abs/1610.02391
The approach is published under the name of 'Grad-CAM: Why did you say that? Visual Explanations from Deep Networks
via Gradient-based Localization', Bolei Zhou et al.
The class defines VGG16 model. The class initializes the network with pretrained weights for VGG16.
Parameters
-----------
n_labels: int
An integer representing the number of output classes
weight_file_path: list of numpy arrays
List of arrays that contain pretrained network parameters. The layers are parsed with respect
to the respective layer name.
Yields
--------
output : numpy array of float32
The corresponding output scores. Shape (batchsize, num_labels)"""
def __init__(self, n_labels, weight_file_path = None):
self.image_mean = [103.939, 116.779, 123.68]
self.n_labels = n_labels
self.epsilon = 1e-4
self.g = tf.get_default_graph()
assert (weight_file_path is not None), 'No weight file found'
self.pretrained_weights = np.load(weight_file_path, encoding='bytes')
def get_conv_weight( self, name ):
"""Accessing conv biases from the network weights"""
return (self.pretrained_weights[name])
def get_conv_bias( self, name ):
"""Accessing biases from the network weights"""
return (self.pretrained_weights[name])
def conv_layer( self, bottom, name, stride = 1):
"""Implementation of convolutional layer using tensorflow predefined conv function.
Parameters
----------
bottom: Tensor
A tensor of shape (batchsize, height, width, channels)
name: String
A name for the variable scope according to the layer it belongs to.
stride: Int
An integer value defining the convolutional layer stride.
Yields
--------
relu : Tensor
A tensor of shape (batchsize, height, width, channels)"""
with tf.variable_scope(name) as scope:
# The weights are retrieved according to how they are stored in arrays
w = self.get_conv_weight(name+'_W')
b = self.get_conv_bias(name+'_b')
conv_weights = tf.get_variable(
"W",
shape=w.shape,
initializer=tf.constant_initializer(w)
)
conv_biases = tf.get_variable(
"b",
shape=b.shape,
initializer=tf.constant_initializer(b)
)
conv = tf.nn.conv2d( bottom, conv_weights, [1,stride,stride,1], padding='SAME')
bias = tf.nn.bias_add( conv, conv_biases)
with self.g.gradient_override_map({'Relu': 'GuidedRelu'}):
relu = tf.nn.relu(bias, name=name)
return relu
def network( self, image, is_training = True, dropout = 1.0):
""" Defines the Standard VGG16 Network, proposed in Very Deep Convolutional Networks for
Large-Scale Image Recognition Simonyan et al. https://arxiv.org/abs/1409.1556
Parameters
----------
image: Tensor
A tensor of shape (batchsize, height, width, channels)
Yields
--------
output : numpy array of float32
The corresponding output scores. Shape (batchsize, num_labels)"""
image *= 255.
r, g, b = tf.split(3, 3, image)
image = tf.concat(3,
[
b-self.image_mean[0],
g-self.image_mean[1],
r-self.image_mean[2]
])
relu1_1 = self.conv_layer( image, "conv1_1" )
relu1_2 = self.conv_layer( relu1_1, "conv1_2" )
pool1 = tf.nn.max_pool(relu1_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool1')
relu2_1 = self.conv_layer(pool1, "conv2_1")
relu2_2 = self.conv_layer(relu2_1, "conv2_2")
pool2 = tf.nn.max_pool(relu2_2, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool2')
relu3_1 = self.conv_layer( pool2, "conv3_1")
relu3_2 = self.conv_layer( relu3_1, "conv3_2")
relu3_3 = self.conv_layer( relu3_2, "conv3_3")
pool3 = tf.nn.max_pool(relu3_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool3')
relu4_1 = self.conv_layer( pool3, "conv4_1")
relu4_2 = self.conv_layer( relu4_1, "conv4_2")
relu4_3 = self.conv_layer( relu4_2, "conv4_3")
pool4 = tf.nn.max_pool(relu4_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool4')
relu5_1 = self.conv_layer( pool4, "conv5_1")
relu5_2 = self.conv_layer( relu5_1, "conv5_2")
relu5_3 = self.conv_layer( relu5_2, "conv5_3")
pool5 = tf.nn.max_pool(relu5_3, ksize=[1, 2, 2, 1], strides=[1, 2, 2, 1], padding='SAME', name='pool5')
with tf.variable_scope('fc1') as scope:
w = self.get_conv_weight('fc6_W')
b = self.get_conv_bias('fc6_b')
fc_weights = tf.get_variable("W", shape=w.shape, initializer=tf.constant_initializer(w))
fc_biases = tf.get_variable("b", shape=b.shape, initializer=tf.constant_initializer(b))
shape = int(np.prod(pool5.get_shape()[1:]))
pool5_flat = tf.reshape(pool5, [-1, shape])
fc1l = tf.nn.bias_add(tf.matmul(pool5_flat, fc_weights), fc_biases)
with self.g.gradient_override_map({'Relu': 'GuidedRelu'}):
self.fc1 = tf.nn.relu(fc1l)
self.fc1 = tf.nn.dropout(self.fc1, dropout)
with tf.variable_scope('fc2') as scope:
w = self.get_conv_weight('fc7_W')
b = self.get_conv_bias('fc7_b')
fc_weights = tf.get_variable("W", shape=w.shape, initializer=tf.constant_initializer(w))
fc_biases = tf.get_variable("b", shape=b.shape, initializer=tf.constant_initializer(b))
fc2l = tf.nn.bias_add(tf.matmul(self.fc1, fc_weights), fc_biases)
with self.g.gradient_override_map({'Relu': 'GuidedRelu'}):
self.fc2 = tf.nn.relu(fc2l)
self.fc2 = tf.nn.dropout(self.fc2, dropout)
with tf.variable_scope('fc3') as scope:
w = self.get_conv_weight('fc8_W')
b = self.get_conv_bias('fc8_b')
fc_weights = tf.get_variable("W", shape=w.shape, initializer=tf.constant_initializer(w))
fc_biases = tf.get_variable("b", shape=b.shape, initializer=tf.constant_initializer(b))
out = tf.nn.bias_add(tf.matmul(self.fc2, fc_weights), fc_biases)
return out
def get_grad_cam(self, fmaps, gradients, height = 224, width = 224, num_fmaps = 512):
""" Computes Class Activation Maps using gradients backpropagated from output to the last convolutional
layer in the network
Parameters
-----------
fmaps: Numpy array of float32
A batch of feature maps. Shape (batchsize, height, width, channels).
gradients: Numpy array of float32
Gradients computed via backpropagation. Shape (height, width, channels).
height: Int
An integer to which the CAM height is to be upsampled. It should be the height of input image.
width: Int
An integer to which the CAM width is to be upsampled. It should be the width of input image
num_fmaps: Int
Corresponds to the number of feature maps in the last convolutional layer. In simple terms it's the depth of
Returns
---------
Class Activation Map (CAM), a single channeled, upsampled, weighted sum of last conv filter maps. """
weights = np.mean(gradients, axis=(0,1))
fmaps_resized = tf.image.resize_bilinear(fmaps, [height, width] )
fmaps_reshaped = tf.reshape(fmaps_resized, [-1, height*width, num_fmaps])
label_w = tf.reshape( weights, [-1, num_fmaps, 1])
classmap = tf.batch_matmul(fmaps_reshaped, label_w )
classmap = tf.reshape( classmap, [-1, height, width] )
return classmap