-
Notifications
You must be signed in to change notification settings - Fork 19
/
detector.py
168 lines (129 loc) · 5.73 KB
/
detector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
import argparse
import os
import sys
import cv2
import numpy as np
import torch
from anchor import generate_anchors, anchors_of_feature_map
from config import Config
from model import Net
from utils import change_coordinate, change_coordinate_inv, seek_model, save_bounding_boxes_image, nms
from evaluation_metrics import softmax
device = torch.device(Config.DEVICE)
class Detector(object):
def __init__(self, model, image_size=Config.IMAGE_SIZE, threshold=Config.PREDICTION_THRESHOLD):
if type(model) == str:
checkpoint = torch.load(seek_model(model))
self.model = Net().to(device)
self.model.load_state_dict(checkpoint['state_dict'], strict=True)
else:
self.model = model
self.model.eval()
self.threshold = threshold
self.image_size = image_size
anchor_configs = (
Config.ANCHOR_STRIDE,
Config.ANCHOR_SIZE,
)
def convert_predictions(self, predictions, path, anchors):
# get sorted indices by score
scores, klass = torch.max(softmax(predictions[:, 4:]), dim=1)
inds = klass != 0
scores, klass, predictions, anchors = \
scores[inds], klass[inds], predictions[inds], anchors[inds]
if len(scores) == 0:
return None
scores, inds = torch.sort(scores, descending=True)
klass, predictions, anchors = klass[inds], predictions[inds], anchors[inds]
# inds = scores > self.threshold
# scores, klass, predictions, anchors = \
# scores[inds], klass[inds], predictions[inds], anchors[inds]
scores, klass, predictions, anchors = \
scores[:200], klass[:200], predictions[:200], anchors[:200]
if len(predictions) == 0:
return None
anchors = anchors.to(device).float()
x = (predictions[:, 0] * anchors[:, 2] + anchors[:, 0])
y = (predictions[:, 1] * anchors[:, 3] + anchors[:, 1])
w = (torch.exp(predictions[:, 2]) * anchors[:, 2])
h = (torch.exp(predictions[:, 3]) * anchors[:, 3])
bounding_boxes = torch.stack((x, y, w, h), dim=1).cpu().data.numpy()
bounding_boxes = change_coordinate_inv(bounding_boxes)
scores = scores.cpu().data.numpy()
klass = klass.cpu().data.numpy()
bboxes_scores = np.hstack(
(bounding_boxes, np.array(list(zip(*(scores, klass)))))
)
# nms
keep = nms(bboxes_scores)
return bboxes_scores[keep]
def forward(self, batched_data):
"""predict with pytorch dataset output
Args:
batched_data (tensor): yield by the dataset
Returns: predicted coordinate and score
"""
images = batched_data[0].permute(0, 3, 1, 2).to(device).float()
predictions = list(zip(*list(self.model(images))))
result = []
for i, prediction in enumerate(predictions):
prediction = list(prediction)
anchors = []
for k, feature_map_prediction in enumerate(prediction):
# create anchors of this feature_map_prediction layer
if (k % 2) == 0:
anchors.append( np.array( anchors_of_feature_map(
Config.ANCHOR_STRIDE[k//2],
Config.ANCHOR_SIZE[k//2],
feature_map_prediction.size()[1:])))
prediction[k] = feature_map_prediction \
.view(feature_map_prediction.size()[0], -1) \
.permute(1, 0).contiguous()
reg_preds = torch.cat(prediction[::2])
cls_preds = torch.cat(prediction[1::2])
anchors = torch.tensor(np.vstack(anchors))
result.append(self.convert_predictions(
torch.cat((reg_preds, cls_preds), dim=1),
batched_data[2][i], anchors))
return result
def infer(self, image):
image = cv2.imread(image)
image = image - np.array([104, 117, 123], dtype=np.uint8)
_input = torch.tensor(image).permute(2, 0, 1).float() \
.to(device).unsqueeze(0)
predictions = self.model(_input)
# flatten predictions
reg_preds = []
cls_preds = []
anchors = []
for index, prediction in enumerate(predictions):
if (index % 2) == 0:
anchors.append( np.array( anchors_of_feature_map (
Config.ANCHOR_STRIDE[index//2],
Config.ANCHOR_SIZE[index//2],
prediction.size()[2:]
)))
predictions[index] = prediction.squeeze().view(prediction.size()[1], -1).permute(1, 0)
anchors = torch.tensor(np.vstack(anchors))
reg_preds = torch.cat(predictions[::2])
cls_preds = torch.cat(predictions[1::2])
return self.convert_predictions(torch.cat((reg_preds, cls_preds), dim=1), None, anchors)
def main(args):
print('predicted bounding boxes of faces:')
bboxes = Detector(args.model).infer(args.image)
print(bboxes)
if args.save_to:
save_bounding_boxes_image(args.image, bboxes, args.save_to)
if __name__ == '__main__':
parser = argparse.ArgumentParser(description='predictor')
parser.add_argument('--image', type=str,
help='image to be predicted')
parser.add_argument('--model', type=str,
help='model to use, could be epoch number, model file '
'name or model file absolute path')
parser.add_argument('--keep', type=int, default=150,
help='how many predictions to keep, default: 150')
parser.add_argument('--save_to', type=str,
help='save the image with bboxes to a file')
args = parser.parse_args()
main(args)