-
Notifications
You must be signed in to change notification settings - Fork 0
/
detector.py
371 lines (295 loc) · 16.2 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
import os
import cv2
import matplotlib.image as mpimg
import matplotlib.pyplot as plt
import numpy as np
import pandas as pd
from scipy.ndimage import label
import hog as h
import utils as u
def slide_window(img, x_start_stop=[None, None], y_start_stop=[None, None], xy_window=(64, 64), xy_overlap=(0.5, 0.5)):
"""Generate a list of sliding windows over an image.
Args:
img (np.array): The image over which windows will slide.
x_start_stop (list, optional): The start and stop x positions for the window. Defaults to [None, None].
y_start_stop (list, optional): The start and stop y positions for the window. Defaults to [None, None].
xy_window (tuple, optional): Window size in (width, height) format. Defaults to (64, 64).
xy_overlap (tuple, optional): Fraction of window overlap in (x, y) format. Defaults to (0.5, 0.5).
Returns:
list: List of window positions, each position is a tuple of top-left and bottom-right coordinates.
"""
# If x and/or y start/stop positions not defined, set to image size
if x_start_stop[0] == None:
x_start_stop[0] = 0
if x_start_stop[1] == None:
x_start_stop[1] = img.shape[1]
if y_start_stop[0] == None:
y_start_stop[0] = 0
if y_start_stop[1] == None:
y_start_stop[1] = img.shape[0]
# Compute the span of the region to be searched
xspan = x_start_stop[1] - x_start_stop[0]
yspan = y_start_stop[1] - y_start_stop[0]
# Compute the number of pixels per step in x/y
nx_pix_per_step = int(xy_window[0]*(1 - xy_overlap[0]))
ny_pix_per_step = int(xy_window[1]*(1 - xy_overlap[1]))
# Compute the number of windows in x/y
nx_buffer = int(xy_window[0]*(xy_overlap[0]))
ny_buffer = int(xy_window[1]*(xy_overlap[1]))
nx_windows = int((xspan-nx_buffer)/nx_pix_per_step)
ny_windows = int((yspan-ny_buffer)/ny_pix_per_step)
window_list = []
# Loop through finding x and y window positions
for ys in range(ny_windows):
for xs in range(nx_windows):
# Calculate window position
startx = xs*nx_pix_per_step + x_start_stop[0]
endx = startx + xy_window[0]
starty = ys*ny_pix_per_step + y_start_stop[0]
endy = starty + xy_window[1]
window_list.append(((startx, starty), (endx, endy)))
return window_list
def draw_boxes(img, bboxes, color=(0, 0, 255), thick=6):
"""Draw bounding boxes on an image.
Args:
img (np.array): The image on which boxes will be drawn.
bboxes (list): List of bounding boxes, each box is represented by top-left and bottom-right coordinates.
color (tuple, optional): Color of the boxes in (R, G, B) format. Defaults to (0, 0, 255).
thick (int, optional): Thickness of the box lines. Defaults to 6.
Returns:
np.array: Image with drawn boxes.
"""
imcopy = np.copy(img)
random_color = False
# Iterate through the bounding boxes
for bbox in bboxes:
if color == 'random' or random_color:
color = (np.random.randint(0,255), np.random.randint(0,255), np.random.randint(0,255))
random_color = True
# Draw a rectangle given bbox coordinates
cv2.rectangle(imcopy, bbox[0], bbox[1], color, thick)
return imcopy
def view_windows(image, window_scale, x_start_stop, y_start_stop, xy_window, xy_overlap, color_values):
"""Visualize sliding windows on an image.
Args:
image (np.array): The image on which windows will be visualized.
window_scale (tuple): The scales of the windows to slide over the image.
x_start_stop (tuple): The start and stop positions in the x-dimension to search for windows.
y_start_stop (list): List of start and stop positions in the y-dimension for each scale.
xy_window (tuple): Window size in (width, height) format.
xy_overlap (tuple): Fraction of window overlap in (x, y) format.
color_values (list): List of colors for the boxes for each scale.
Returns:
None: The function visualizes the sliding windows on the input image using matplotlib.
"""
fig, ax = plt.subplots(figsize=(10,10))
for i, scale in enumerate(window_scale):
windows = slide_window(image, x_start_stop=x_start_stop, y_start_stop=y_start_stop[i],
xy_window=[int(dim*window_scale[i]) for dim in xy_window], xy_overlap=xy_overlap)
image = draw_boxes(image, windows, color_values[i])
print("Number of windows for scale {}: {}".format(scale, len(windows)))
plt.imshow(image)
plt.title("Test Image")
plt.show()
def find_cars(img, x_start_stop, ystart, ystop, scale, model, X_scaler, orientations, pix_per_cell, cell_per_block, spatial_size, hist_bins, vis_bboxes=False):
"""Find cars in an image using a sliding window approach and a trained classifier.
Args:
img (np.array): The image to be searched for cars.
x_start_stop (tuple): The start and stop positions of the search in the x-dimension.
ystart (int): The start position of the search in the y-dimension.
ystop (int): The stop position of the search in the y-dimension.
scale (float): The scale of the search window.
model (classifier): The trained classifier used to predict if a window contains a car.
X_scaler (scaler): The scaler used to normalize the feature vectors.
orientations (int): Number of HOG orientations.
pix_per_cell (int): Number of pixels per cell for the HOG features.
cell_per_block (int): Number of cells per block for the HOG features.
spatial_size (tuple): Spatial size for the spatial binning of the color channels.
hist_bins (int): Number of histogram bins for color histogram features.
vis_bboxes (bool, optional): If True, visualizes all bounding boxes regardless of classifier prediction. Defaults to False.
Returns:
list: List of bounding boxes where cars were detected.
"""
draw_img = np.copy(img)
xstart = 0
xstop = img.shape[1]
img_tosearch = img[ystart:ystop, x_start_stop[0]:x_start_stop[1], :]
ctrans_tosearch = h.convert_RGB_color(img_tosearch, color_space='RGB')
if scale != 1:
imshape = ctrans_tosearch.shape
ctrans_tosearch = cv2.resize(ctrans_tosearch, (int(imshape[1]/scale), int(imshape[0]/scale)))
ch1 = ctrans_tosearch[:, :, 0]
ch2 = ctrans_tosearch[:, :, 1]
ch3 = ctrans_tosearch[:, :, 2]
# Define blocks and steps as above
nxblocks = (ch1.shape[1] // pix_per_cell) - cell_per_block + 1
nyblocks = (ch1.shape[0] // pix_per_cell) - cell_per_block + 1
# 64 was the orginal sampling rate, with 8 cells and 8 pix per cell
window = 64
nblocks_per_window = (window // pix_per_cell) - cell_per_block + 1
cells_per_step = 2 # Instead of overlap, define how many cells to step
nxsteps = (nxblocks - nblocks_per_window) // cells_per_step
nysteps = (nyblocks - nblocks_per_window) // cells_per_step
# Compute individual channel HOG features for the entire image
hog1 = h.get_hog_features(ch1, orientations=orientations, pixels_per_cell=pix_per_cell, cell_per_block=cell_per_block, feature_vector=False)
hog2 = h.get_hog_features(ch2, orientations=orientations, pixels_per_cell=pix_per_cell, cell_per_block=cell_per_block, feature_vector=False)
hog3 = h.get_hog_features(ch3, orientations=orientations, pixels_per_cell=pix_per_cell, cell_per_block=cell_per_block, feature_vector=False)
rectangles = []
for xb in range(nxsteps):
for yb in range(nysteps):
ypos = yb*cells_per_step
xpos = xb*cells_per_step
# Extract HOG for this patch
hog_feat1 = hog1[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat2 = hog2[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_feat3 = hog3[ypos:ypos+nblocks_per_window, xpos:xpos+nblocks_per_window].ravel()
hog_features = np.hstack((hog_feat1, hog_feat2, hog_feat3)).reshape(1, -1)
xleft = xpos*pix_per_cell
ytop = ypos*pix_per_cell
# Extract the image patch
subimg = cv2.resize(ctrans_tosearch[ytop:ytop+window, xleft:xleft+window], (64,64))
#spatial_features = h.bin_spatial(subimg, size=spatial_size).reshape(1, -1)
hist_features = h.color_hist(subimg, bins_nb=hist_bins).reshape(1, -1)
test_features = X_scaler.transform(np.hstack((hist_features, hog_features)))
test_prediction = model.predict(test_features)
if test_prediction == 1 or vis_bboxes == True:
xbox_left = int(xleft*scale)
ytop_draw = int(ytop*scale)
win_draw = int(window*scale)
rectangles.append(((xbox_left+xstart, ytop_draw+ystart), (xbox_left+win_draw+xstart, ytop_draw+win_draw+ystart)))
return rectangles
def apply_threshold(heatmap, threshold):
"""Apply a threshold to a heatmap, setting values below the threshold to zero.
Args:
heatmap (np.array): The heatmap on which the threshold is to be applied.
threshold (int): The threshold value.
Returns:
np.array: The thresholded heatmap.
"""
heatmap[heatmap <= threshold] = 0
return heatmap
def apply_adaptive_threshold(heatmap, threshold_ratio):
"""Apply an adaptive threshold to a heatmap.
Args:
heatmap (np.array): The heatmap on which the threshold is to be applied.
threshold_ratio (float): The ratio of the maximum heatmap value to be used as the threshold.
Returns:
np.array: The thresholded heatmap.
"""
max_val = np.amax(heatmap)
threshold = max_val * threshold_ratio
heatmap[heatmap <= threshold] = 0
return heatmap
def get_rectangles(image, window_scale, x_start_stop, y_start_stop, model, X_scaler, orientations, pix_per_cell, cell_per_block, spatial_size, hist_bins):
"""Generate a list of rectangles where cars are detected in an image.
Args:
image (np.array): The image on which cars are to be detected.
window_scale (tuple): The scales of the windows to slide over the image.
x_start_stop (tuple): The start and stop positions in the x-dimension to search for windows.
y_start_stop (list): List of start and stop positions in the y-dimension for each scale.
model (classifier): The trained classifier used to detect cars.
X_scaler (scaler): The scaler used to normalize the feature vectors.
orientations (int): Number of HOG orientations.
pix_per_cell (int): Number of pixels per cell for the HOG features.
cell_per_block (int): Number of cells per block for the HOG features.
spatial_size (tuple): Spatial size for the spatial binning of the color channels.
hist_bins (int): Number of histogram bins for color histogram features.
Returns:
list: List of bounding boxes where cars were detected.
"""
out_rectangles = []
for i, scale in enumerate(window_scale):
rectangles = find_cars(image, x_start_stop, y_start_stop[i][0], y_start_stop[i][1], scale, model, X_scaler, orientations, pix_per_cell, cell_per_block, spatial_size, hist_bins)
if len(rectangles) > 0:
out_rectangles.append(rectangles)
out_rectangles = [item for sublist in out_rectangles for item in sublist]
return out_rectangles
def add_heat(heatmap, bbox_list):
"""Increase the intensity of pixels inside each bounding box in the heatmap.
Args:
heatmap (np.array): The heatmap to which heat will be added.
bbox_list (list): List of bounding boxes.
Returns:
np.array: Updated heatmap with added heat.
"""
# Iterate through list of bboxes
for box in bbox_list:
# Add += 1 for all pixels inside each bbox
# Assuming each "box" takes the form ((x1, y1), (x2, y2))
heatmap[box[0][1]:box[1][1], box[0][0]:box[1][0]] += 1
# Return updated heatmap
return heatmap
def draw_labeled_bboxes(img, labels):
"""Draw bounding boxes on an image based on labeled regions.
Args:
img (np.array): The image on which bounding boxes are to be drawn.
labels (tuple): Labeled regions in the format (labeled_array, number_of_labels).
Returns:
list: List of bounding boxes drawn.
np.array: Image with bounding boxes drawn.
"""
img_copy = np.copy(img)
result_rectangles = []
for car_number in range(1, labels[1]+1):
# Find pixels with each car_number label value
nonzero = (labels[0] == car_number).nonzero()
# Identify x and y values of those pixels
nonzeroy = np.array(nonzero[0])
nonzerox = np.array(nonzero[1])
# Define a bounding box based on min/max x and y
bbox = ((np.min(nonzerox), np.min(nonzeroy)), (np.max(nonzerox), np.max(nonzeroy)))
area = (bbox[1][1] - bbox[0][1]) * (bbox[1][0] - bbox[0][0])
if area > 40 * 40:
x, y = bbox[0][0], bbox[0][1]
w, h = bbox[1][0] - bbox[0][0], bbox[1][1] - bbox[0][1]
result_rectangles.append([x, y, w, h])
# Draw the box on the image
cv2.rectangle(img_copy, bbox[0], bbox[1], (0, 255, 0), 6)
return result_rectangles, img_copy
def predict_on_test_images(test_files, data_path, window_scale, x_start_stop, y_start_stop, model, X_scaler, orientations, pix_per_cell, cell_per_block, spatial_size, hist_bins, threshold_value):
"""Predict car locations on a set of test images using a trained classifier and saves the predictions to a CSV.
Args:
test_files (list): List of test image file names.
data_path (str): Path to the directory containing test images.
window_scale (tuple): The scales of the windows to slide over the images.
x_start_stop (tuple): The start and stop positions in the x-dimension to search for windows.
y_start_stop (list): List of start and stop positions in the y-dimension for each scale.
model (classifier): The trained classifier used to detect cars.
X_scaler (scaler): The scaler used to normalize the feature vectors.
orientations (int): Number of HOG orientations.
pix_per_cell (int): Number of pixels per cell for the HOG features.
cell_per_block (int): Number of cells per block for the HOG features.
spatial_size (tuple): Spatial size for the spatial binning of the color channels.
hist_bins (int): Number of histogram bins for color histogram features.
threshold_value (float): Adaptive threshold value for heatmap processing.
Returns:
tuple: Contains lists of result images, bounding boxes, heatmap images, thresholded heatmap images, and result images with all bounding boxes.
"""
result_images = []
result_boxes = []
heatmap_images = []
threshold_images = []
result_img_all_boxes = []
rows = []
for file_name in test_files:
img_path = os.path.join(data_path, file_name)
img = mpimg.imread(img_path)
rectangles = get_rectangles(img, window_scale, x_start_stop, y_start_stop, model, X_scaler, orientations, pix_per_cell, cell_per_block, spatial_size, hist_bins)
result_img_all_boxes.append(draw_boxes(img, rectangles, color=(0, 0, 255), thick=3))
heatmap_image = np.zeros_like(img[:, :, 0])
heatmap_image = add_heat(heatmap_image, rectangles)
heatmap_images.append(heatmap_image)
threshold_image = apply_adaptive_threshold(heatmap_image, threshold_value)
threshold_images.append(threshold_image)
labels = label(threshold_image)
rectangles, result_image = draw_labeled_bboxes(img, labels)
result_images.append(result_image)
result_boxes.append(rectangles)
# Convert bounding boxes to binary mask and produce run-length encoding
rle = u.run_length_encoding(u.bounding_boxes_to_mask(rectangles, img.shape[0], img.shape[1]))
# Append row to dataframe
rows.append(['test/' + file_name, rle])
# Create dataframe and save to csv file
df_prediction = pd.DataFrame(columns=['Id', 'Predicted'], data=rows).set_index('Id')
df_prediction.to_csv('sample_submission.csv')
return result_images, result_boxes, heatmap_images, threshold_images, result_img_all_boxes