-
Notifications
You must be signed in to change notification settings - Fork 0
/
main.py
87 lines (75 loc) · 2.96 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
import numpy as np
import argparse
import imutils
import time
import cv2
from urllib.request import urlopen
from content.speech_message import SpeechMessage
speech = SpeechMessage()
host = 'http://192.168.0.101:8080/'
url = host + 'shot.jpg'
# construct the argument parse and parse the arguments
ap = argparse.ArgumentParser()
ap.add_argument("--prototxt", required=True,
help="path to Caffe 'deploy' prototxt file")
ap.add_argument("--model", required=True,
help="path to Caffe pre-trained model")
ap.add_argument("--source", required=True,
help="Source of video stream (webcam/host)")
ap.add_argument("-c", "--confidence", type=float, default=0.2,
help="minimum probability to filter weak detections")
ap.add_argument("-l", "--labels", required=True,
help="path to ImageNet labels (i.e., syn-sets)")
args = vars(ap.parse_args())
rows = open(args["labels"]).read().strip().split("\n")
CLASSES = [r[r.find(" ") + 1:].split(",")[0] for r in rows]
COLORS = np.random.uniform(0, 255, size=(len(CLASSES), 3))
# load our serialized model from disk
print("[INFO] loading model...")
net = cv2.dnn.readNetFromCaffe(args["prototxt"], args["model"])
# initialize the video stream, allow the cammera sensor to warmup,
print("[INFO] starting video stream...")
if args["source"] == "webcam":
vs = cv2.VideoCapture(0)
vs.set(3, 1280)
# vs.set(4, 1024)
# vs.set(15, 0.1)
time.sleep(2.0)
detected_objects = []
# loop over the frames from the video stream
def main():
while True:
if args["source"] == "webcam":
ret, frame = vs.read()
else:
imgResp = urlopen(url)
imgNp = np.array(bytearray(imgResp.read()), dtype=np.uint8)
frame = cv2.imdecode(imgNp, -1)
frame = imutils.resize(frame, width=1200)
(h, w) = frame.shape[:2]
blob = cv2.dnn.blobFromImage(cv2.resize(frame, (224, 224)), 1, (224, 224), (104, 117, 123))
net.setInput(blob)
detections = net.forward()
idxs = np.argsort(detections[0])[::-1][:5]
for (i, idx) in enumerate(idxs):
confidence = detections[0, idx]
if confidence > args["confidence"]:
confidence = detections[0, idx]
if i == 0:
if ((confidence * 100) > 60.00):
#speech.speech(CLASSES[idx])
speech.thread_run(CLASSES[idx])
text = "Label: {}, {:.2f}%".format(CLASSES[idx], confidence * 100)
cv2.putText(frame, text, (5, 25), cv2.FONT_HERSHEY_SIMPLEX, 0.7, (0, 0, 255), 2)
cv2.imshow("Frame", frame)
key = cv2.waitKey(1) & 0xFF
if key == ord("q"):
break
# do a bit of cleanup
cv2.destroyAllWindows()
if __name__ == '__main__':
speech.init_speech()
main()
# thread_all_data = threading.Thread(target=main)
# thread_all_data.daemon = True
# thread_all_data.start()