-
Notifications
You must be signed in to change notification settings - Fork 0
/
MaskDetector.py
133 lines (104 loc) · 4.86 KB
/
MaskDetector.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
import cv2
import matplotlib.pyplot as plt
import torch
import yaml
from torchvision import transforms
import numpy as np
from utils.datasets import letterbox
from utils.general import non_max_suppression_mask_conf
from detectron2.modeling.poolers import ROIPooler
from detectron2.structures import Boxes
from detectron2.utils.memory import retry_if_cuda_oom
from detectron2.layers import paste_masks_in_image
def model_loader(model_name):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
weigths = torch.load(model_name)
model = weigths['model']
_ = model.eval()
model = model.half().to(device) if device.type != "cpu" else model.float().to(device)
return model
def inference(image, model):
device = torch.device("cuda:0" if torch.cuda.is_available() else "cpu")
with open('data/hyp.scratch.mask.yaml') as f:
hyp = yaml.load(f, Loader=yaml.FullLoader)
image = letterbox(image, 640, stride=64, auto=True)[0]
image_ = image.copy()
image = transforms.ToTensor()(image)
image = torch.tensor(np.array([image.numpy()]))
image = image.to(device)
image = image.half() if device.type != "cpu" else image.float()
output = model(image)
inf_out, train_out, attn, mask_iou, bases, sem_output = output['test'], output['bbox_and_cls'], output['attn'], \
output['mask_iou'], output['bases'], output['sem']
bases = torch.cat([bases, sem_output], dim=1)
nb, _, height, width = image.shape
names = model.names
pooler_scale = model.pooler_scale
pooler = ROIPooler(output_size=hyp['mask_resolution'], scales=(pooler_scale,), sampling_ratio=1,
pooler_type='ROIAlignV2', canonical_level=2)
output, output_mask, output_mask_score, output_ac, output_ab = \
non_max_suppression_mask_conf(inf_out, attn, bases, pooler, hyp, conf_thres=0.25, iou_thres=0.65, merge=False, mask_iou=None)
pred, pred_masks = output[0], output_mask[0]
base = bases[0]
bboxes = Boxes(pred[:, :4])
original_pred_masks = pred_masks.view(-1, hyp['mask_resolution'], hyp['mask_resolution'])
pred_masks = retry_if_cuda_oom(paste_masks_in_image)(original_pred_masks, bboxes, (height, width), threshold=0.5)
pred_masks_np = pred_masks.detach().cpu().numpy()
pred_cls = pred[:, 5].detach().cpu().numpy()
pred_conf = pred[:, 4].detach().cpu().numpy()
nimg = image[0].permute(1, 2, 0) * 255
nimg = nimg.cpu().numpy().astype(np.uint8)
nimg = cv2.cvtColor(nimg, cv2.COLOR_RGB2BGR)
nbboxes = bboxes.tensor.detach().cpu().numpy().astype(np.int32)
pnimg = nimg.copy()
for one_mask, bbox, cls, conf in zip(pred_masks_np, nbboxes, pred_cls, pred_conf):
if conf < 0.25:
continue
color = [np.random.randint(255), np.random.randint(255), np.random.randint(255)]
pnimg[one_mask] = pnimg[one_mask] * 0.5 + np.array(color, dtype=np.uint8) * 0.5
pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), (bbox[2], bbox[3]), color, 2)
# label = '%s %.3f' % (names[int(cls)], conf)
# t_size = cv2.getTextSize(label, 0, fontScale=0.5, thickness=1)[0]
# c2 = bbox[0] + t_size[0], bbox[1] - t_size[1] - 3
# pnimg = cv2.rectangle(pnimg, (bbox[0], bbox[1]), c2, color, -1, cv2.LINE_AA) # filled
# pnimg = cv2.putText(pnimg, label, (bbox[0], bbox[1] - 2), 0, 0.5, [255, 255, 255], thickness=1, lineType=cv2.LINE_AA)
return pnimg
if __name__ == '__main__':
# Get the list of available cameras
available_cameras = [f'Camera {i}' for i in range(3)]
for camera_index, camera_name in enumerate(available_cameras):
cap = cv2.VideoCapture(camera_index)
# Check if the camera is available
if cap.isOpened():
print(f"{camera_name}: Available")
cap.release()
else:
print(f"{camera_name}: Not Available")
# Ask the user to select a camera to use
camera_index = int(input("Enter the camera index you want to use: "))
# Open the selected camera
cap = cv2.VideoCapture(camera_index)
# Check if the selected camera is successfully opened
if not cap.isOpened():
print("Could not open the selected camera")
exit()
# Load yolov7 model
yolov7_model = model_loader('yolov7-mask.pt')
# Inference loop
while True:
# Read the camera image
ret, frame = cap.read()
img_result = inference(frame, yolov7_model)
# Check if the image is successfully read
if not ret:
print("Could not read the image")
break
# Display the image in a window
cv2.namedWindow('Camera', cv2.WINDOW_NORMAL)
cv2.imshow('Camera', img_result)
# Check if the 'q' key is pressed to exit the loop
if cv2.waitKey(1) & 0xFF == ord('q'):
break
# Release the camera resources and close the window
cap.release()
cv2.destroyAllWindows()