-
Notifications
You must be signed in to change notification settings - Fork 15
/
viola_jones.py
296 lines (268 loc) · 13.5 KB
/
viola_jones.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
"""
A Python implementation of the Viola-Jones ensemble classification method described in
Viola, Paul, and Michael Jones. "Rapid object detection using a boosted cascade of simple features." Computer Vision and Pattern Recognition, 2001. CVPR 2001. Proceedings of the 2001 IEEE Computer Society Conference on. Vol. 1. IEEE, 2001.
Works in both Python2 and Python3
"""
import numpy as np
import math
import pickle
from sklearn.feature_selection import SelectPercentile, f_classif
class ViolaJones:
def __init__(self, T = 10):
"""
Args:
T: The number of weak classifiers which should be used
"""
self.T = T
self.alphas = []
self.clfs = []
def train(self, training, pos_num, neg_num):
"""
Trains the Viola Jones classifier on a set of images (numpy arrays of shape (m, n))
Args:
training: An array of tuples. The first element is the numpy array of shape (m, n) representing the image. The second element is its classification (1 or 0)
pos_num: the number of positive samples
neg_num: the number of negative samples
"""
weights = np.zeros(len(training))
training_data = []
print("Computing integral images")
for x in range(len(training)):
training_data.append((integral_image(training[x][0]), training[x][1]))
if training[x][1] == 1:
weights[x] = 1.0 / (2 * pos_num)
else:
weights[x] = 1.0 / (2 * neg_num)
print("Building features")
features = self.build_features(training_data[0][0].shape)
print("Applying features to training examples")
X, y = self.apply_features(features, training_data)
print("Selecting best features")
indices = SelectPercentile(f_classif, percentile=10).fit(X.T, y).get_support(indices=True)
X = X[indices]
features = features[indices]
print("Selected %d potential features" % len(X))
for t in range(self.T):
weights = weights / np.linalg.norm(weights)
weak_classifiers = self.train_weak(X, y, features, weights)
clf, error, accuracy = self.select_best(weak_classifiers, weights, training_data)
beta = error / (1.0 - error)
for i in range(len(accuracy)):
weights[i] = weights[i] * (beta ** (1 - accuracy[i]))
alpha = math.log(1.0/beta)
self.alphas.append(alpha)
self.clfs.append(clf)
print("Chose classifier: %s with accuracy: %f and alpha: %f" % (str(clf), len(accuracy) - sum(accuracy), alpha))
def train_weak(self, X, y, features, weights):
"""
Finds the optimal thresholds for each weak classifier given the current weights
Args:
X: A numpy array of shape (len(features), len(training_data)). Each row represents the value of a single feature for each training example
y: A numpy array of shape len(training_data). The ith element is the classification of the ith training example
features: an array of tuples. Each tuple's first element is an array of the rectangle regions which positively contribute to the feature. The second element is an array of rectangle regions negatively contributing to the feature
weights: A numpy array of shape len(training_data). The ith element is the weight assigned to the ith training example
Returns:
An array of weak classifiers
"""
total_pos, total_neg = 0, 0
for w, label in zip(weights, y):
if label == 1:
total_pos += w
else:
total_neg += w
classifiers = []
total_features = X.shape[0]
for index, feature in enumerate(X):
if len(classifiers) % 1000 == 0 and len(classifiers) != 0:
print("Trained %d classifiers out of %d" % (len(classifiers), total_features))
applied_feature = sorted(zip(weights, feature, y), key=lambda x: x[1])
pos_seen, neg_seen = 0, 0
pos_weights, neg_weights = 0, 0
min_error, best_feature, best_threshold, best_polarity = float('inf'), None, None, None
for w, f, label in applied_feature:
error = min(neg_weights + total_pos - pos_weights, pos_weights + total_neg - neg_weights)
if error < min_error:
min_error = error
best_feature = features[index]
best_threshold = f
best_polarity = 1 if pos_seen > neg_seen else -1
if label == 1:
pos_seen += 1
pos_weights += w
else:
neg_seen += 1
neg_weights += w
clf = WeakClassifier(best_feature[0], best_feature[1], best_threshold, best_polarity)
classifiers.append(clf)
return classifiers
def build_features(self, image_shape):
"""
Builds the possible features given an image shape
Args:
image_shape: a tuple of form (height, width)
Returns:
an array of tuples. Each tuple's first element is an array of the rectangle regions which positively contribute to the feature. The second element is an array of rectangle regions negatively contributing to the feature
"""
height, width = image_shape
features = []
for w in range(1, width+1):
for h in range(1, height+1):
i = 0
while i + w < width:
j = 0
while j + h < height:
#2 rectangle features
immediate = RectangleRegion(i, j, w, h)
right = RectangleRegion(i+w, j, w, h)
if i + 2 * w < width: #Horizontally Adjacent
features.append(([right], [immediate]))
bottom = RectangleRegion(i, j+h, w, h)
if j + 2 * h < height: #Vertically Adjacent
features.append(([immediate], [bottom]))
right_2 = RectangleRegion(i+2*w, j, w, h)
#3 rectangle features
if i + 3 * w < width: #Horizontally Adjacent
features.append(([right], [right_2, immediate]))
bottom_2 = RectangleRegion(i, j+2*h, w, h)
if j + 3 * h < height: #Vertically Adjacent
features.append(([bottom], [bottom_2, immediate]))
#4 rectangle features
bottom_right = RectangleRegion(i+w, j+h, w, h)
if i + 2 * w < width and j + 2 * h < height:
features.append(([right, bottom], [immediate, bottom_right]))
j += 1
i += 1
return np.array(features)
def select_best(self, classifiers, weights, training_data):
"""
Selects the best weak classifier for the given weights
Args:
classifiers: An array of weak classifiers
weights: An array of weights corresponding to each training example
training_data: An array of tuples. The first element is the numpy array of shape (m, n) representing the integral image. The second element is its classification (1 or 0)
Returns:
A tuple containing the best classifier, its error, and an array of its accuracy
"""
best_clf, best_error, best_accuracy = None, float('inf'), None
for clf in classifiers:
error, accuracy = 0, []
for data, w in zip(training_data, weights):
correctness = abs(clf.classify(data[0]) - data[1])
accuracy.append(correctness)
error += w * correctness
error = error / len(training_data)
if error < best_error:
best_clf, best_error, best_accuracy = clf, error, accuracy
return best_clf, best_error, best_accuracy
def apply_features(self, features, training_data):
"""
Maps features onto the training dataset
Args:
features: an array of tuples. Each tuple's first element is an array of the rectangle regions which positively contribute to the feature. The second element is an array of rectangle regions negatively contributing to the feature
training_data: An array of tuples. The first element is the numpy array of shape (m, n) representing the integral image. The second element is its classification (1 or 0)
Returns:
X: A numpy array of shape (len(features), len(training_data)). Each row represents the value of a single feature for each training example
y: A numpy array of shape len(training_data). The ith element is the classification of the ith training example
"""
X = np.zeros((len(features), len(training_data)))
y = np.array(list(map(lambda data: data[1], training_data)))
i = 0
for positive_regions, negative_regions in features:
feature = lambda ii: sum([pos.compute_feature(ii) for pos in positive_regions]) - sum([neg.compute_feature(ii) for neg in negative_regions])
X[i] = list(map(lambda data: feature(data[0]), training_data))
i += 1
return X, y
def classify(self, image):
"""
Classifies an image
Args:
image: A numpy 2D array of shape (m, n) representing the image
Returns:
1 if the image is positively classified and 0 otherwise
"""
total = 0
ii = integral_image(image)
for alpha, clf in zip(self.alphas, self.clfs):
total += alpha * clf.classify(ii)
return 1 if total >= 0.5 * sum(self.alphas) else 0
def save(self, filename):
"""
Saves the classifier to a pickle
Args:
filename: The name of the file (no file extension necessary)
"""
with open(filename+".pkl", 'wb') as f:
pickle.dump(self, f)
@staticmethod
def load(filename):
"""
A static method which loads the classifier from a pickle
Args:
filename: The name of the file (no file extension necessary)
"""
with open(filename+".pkl", 'rb') as f:
return pickle.load(f)
class WeakClassifier:
def __init__(self, positive_regions, negative_regions, threshold, polarity):
"""
Args:
positive_regions: An array of RectangleRegions which positively contribute to a feature
negative_regions: An array of RectangleRegions which negatively contribute to a feature
threshold: The threshold for the weak classifier
polarity: The polarity of the weak classifier
"""
self.positive_regions = positive_regions
self.negative_regions = negative_regions
self.threshold = threshold
self.polarity = polarity
def classify(self, x):
"""
Classifies an integral image based on a feature f and the classifiers threshold and polarity
Args:
x: A 2D numpy array of shape (m, n) representing the integral image
Returns:
1 if polarity * feature(x) < polarity * threshold
0 otherwise
"""
feature = lambda ii: sum([pos.compute_feature(ii) for pos in self.positive_regions]) - sum([neg.compute_feature(ii) for neg in self.negative_regions])
return 1 if self.polarity * feature(x) < self.polarity * self.threshold else 0
def __str__(self):
return "Weak Clf (threshold=%d, polarity=%d, %s, %s" % (self.threshold, self.polarity, str(self.positive_regions), str(self.negative_regions))
class RectangleRegion:
def __init__(self, x, y, width, height):
self.x = x
self.y = y
self.width = width
self.height = height
def compute_feature(self, ii):
"""
Computes the value of the Rectangle Region given the integral image
Args:
integral image : numpy array, shape (m, n)
x: x coordinate of the upper left corner of the rectangle
y: y coordinate of the upper left corner of the rectangle
width: width of the rectangle
height: height of the rectangle
"""
return ii[self.y+self.height][self.x+self.width] + ii[self.y][self.x] - (ii[self.y+self.height][self.x]+ii[self.y][self.x+self.width])
def __str__(self):
return "(x= %d, y= %d, width= %d, height= %d)" % (self.x, self.y, self.width, self.height)
def __repr__(self):
return "RectangleRegion(%d, %d, %d, %d)" % (self.x, self.y, self.width, self.height)
def integral_image(image):
"""
Computes the integral image representation of a picture. The integral image is defined as following:
1. s(x, y) = s(x, y-1) + i(x, y), s(x, -1) = 0
2. ii(x, y) = ii(x-1, y) + s(x, y), ii(-1, y) = 0
Where s(x, y) is a cumulative row-sum, ii(x, y) is the integral image, and i(x, y) is the original image.
The integral image is the sum of all pixels above and left of the current pixel
Args:
image : an numpy array with shape (m, n)
"""
ii = np.zeros(image.shape)
s = np.zeros(image.shape)
for y in range(len(image)):
for x in range(len(image[y])):
s[y][x] = s[y-1][x] + image[y][x] if y-1 >= 0 else image[y][x]
ii[y][x] = ii[y][x-1]+s[y][x] if x-1 >= 0 else s[y][x]
return ii