-
Notifications
You must be signed in to change notification settings - Fork 4
/
hello_app_v2.py
119 lines (105 loc) · 4.31 KB
/
hello_app_v2.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import warnings
warnings.filterwarnings("ignore")
import os, argparse
import keras
import cv2, spacy, numpy as np
from keras.models import model_from_json
from keras.optimizers import SGD
from sklearn.externals import joblib
from keras import backend as K
from keras.utils.vis_utils import plot_model
K.set_image_data_format('channels_first')
VQA_model_file_name = 'VQA/VQA_MODEL.json'
VQA_weights_file_name = 'VQA/VQA_MODEL_WEIGHTS.hdf5'
label_encoder_file_name = 'VQA/FULL_labelencoder_trainval.pkl'
def get_image_model():
from keras.applications.vgg16 import VGG16
model = VGG16(weights='imagenet')
from keras.models import Model
new_input = model.input
hidden_layer = model.layers[-2].output
model_new = Model(new_input, hidden_layer)
sgd = SGD(lr=0.1, decay=1e-6, momentum=0.9, nesterov=True)
model_new.compile(optimizer=sgd, loss='categorical_crossentropy')
return model_new
model_vgg = get_image_model()
def get_image_features(image_file_name):
image_features = np.zeros((1, 4096))
im = cv2.resize(cv2.imread(image_file_name), (224, 224))
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)
image_features[0,:] = model_vgg.predict(im)[0]
return image_features
def get_question_features(question):
word_embeddings = spacy.load('en_vectors_web_lg')
tokens = word_embeddings(question)
question_tensor = np.zeros((1, 30, 300))
for j in range(len(tokens)):
question_tensor[0,j,:] = tokens[j].vector
return question_tensor
import win32com.client as wincl
speak = wincl.Dispatch("SAPI.SpVoice")
def get_VQA_model(VQA_model_file_name, VQA_weights_file_name):
vqa_model = model_from_json(open(VQA_model_file_name).read())
vqa_model.load_weights(VQA_weights_file_name)
vqa_model.compile(loss='categorical_crossentropy', optimizer='rmsprop')
return vqa_model
model_vqa = get_VQA_model(VQA_model_file_name, VQA_weights_file_name)
labelencoder = joblib.load(label_encoder_file_name)
import numpy as np
import cv2
from flask import request
from flask import jsonify
from flask import Flask
app = Flask(__name__)
@app.route('/hello', methods=['POST'])
def hello():
message = request.get_json(force=True)
#question = message['name']
question = u'what is there in the picture?'
import speech_recognition as sr
r = sr.Recognizer()
try:
mic = sr.Microphone(device_index=2)
with mic as source:
print("Ask the question:")
audio = r.listen(source)
except sr.UnknownValueError:
with sr.Microphone() as source:
print("Ask the question:")
audio = r.listen(source)
try:
print("You asked " + r.recognize_google(audio) + " ?")
question = r.recognize_google(audio)+" ?"
except sr.UnknownValueError:
print("Could not understand the question")
except sr.RequestError as e:
print("Could not request results; {0}".format(e))
cap = cv2.VideoCapture(0)
while(True):
ret, frame = cap.read()
#question = u'what is there in the picture?'
gray = cv2.cvtColor(frame, cv2.COLOR_BGR2GRAY)
cv2.imshow('frame',gray)
if cv2.waitKey(1) & 0xFF == ord('c'):
image_features = np.zeros((1, 4096))
im = cv2.resize(frame, (224, 224))
im = im.transpose((2,0,1))
im = np.expand_dims(im, axis=0)
image_features[0,:] = model_vgg.predict(im)[0]
question_features = get_question_features(question)
y_output = model_vqa.predict([question_features, image_features])
warnings.filterwarnings("ignore", category=DeprecationWarning)
labelencoder = joblib.load(label_encoder_file_name)
for label in reversed(np.argsort(y_output)[0,-5:]):
ans = "According to me the answer is "+labelencoder.inverse_transform(label)
speak.Speak(ans)
break
response = {'greeting':question+'\n'+ ans + '.'}
break
cv2.destroyAllWindows()
return jsonify(response)
if __name__=='__main__':
# manager.run()
app.run(debug=True)
#app.run(debug=True,host='192.168.0.11')