-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathBluemixSpeech.py
123 lines (97 loc) · 4.39 KB
/
BluemixSpeech.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
#==================================================================================
# IBM Bluemix Speech Recognition for the RocConf Project
# - Jeffery A. White - September 2016
#
# Based upon the SpeechRecognition Library
# SpeechRecognition - Zhang, A. (2015).
# Speech Recognition (Version 3.1) [Software].
# Available from https://github.com/Uberi/speech_recognition#readme.
#
# Modified here to take .flac as input since we couldn't get the
# flac encoder in that library to work. Uses FFMPEG converted .flac files.
#
# Usage - BluemixSpeech.py {Filename}
#==================================================================================
import pymongo
from pymongo import MongoClient
import pprint
import sys
import os, base64
import json
from urllib import urlencode
from urllib2 import Request, urlopen, URLError, HTTPError
class RequestError(Exception): pass
client = MongoClient()
database = client['rocconf']
transcript_collection = database['transcript_bluemix']
raw_data_collection = database['speechrawdata_bluemix']
session_key = ""
user_id = ""
pp = pprint.PrettyPrinter(indent=2)
#==================================================================================
# Access keys for using IBM Bluemix. Generate these through a Bluemix Account
# http://www.ibm.com/cloud-computing/bluemix/
#==================================================================================
IBM_TONE_USERNAME = "2572b9cb-a85b-4a2d-afbf-c16813e8c18a"
IBM_TONE_PASSWORD = "b33xVMxeZsjq"
#==================================================================================
# Send the .flac data to BlueMix, dump the results to the DB
# and return the results to the caller
#==================================================================================
def bluemix_call(filename):
language = "en-US"
basepath = os.path.dirname(__file__)
filepath = os.path.abspath(os.path.join(basepath,filename))
flac_file = open(filepath,'r')
flac_data = flac_file.read()
model = "{0}_BroadbandModel".format(language)
url = "https://stream.watsonplatform.net/speech-to-text/api/v1/recognize?{0}".format(urlencode({"profanity_filter": "false","timestamps": "true","continuous": "true","model": model}))
request = Request(url, data = flac_data, headers = {"Content-Type": "audio/x-flac"})
if hasattr("", "encode"):
authorization_value = base64.standard_b64encode("{0}:{1}".format(IBM_TONE_USERNAME, IBM_TONE_PASSWORD).encode("utf-8")).decode("utf-8")
else:
authorization_value = base64.standard_b64encode("{0}:{1}".format(IBM_TONE_USERNAME, IBM_TONE_PASSWORD))
request.add_header("Authorization", "Basic {0}" . format(authorization_value))
try:
response = urlopen(request)
except HTTPError as e:
raise RequestError("speech recognition request failed: {0}".format(getattr(e, "reason", "status {0}".format(e.code)))) # use getattr to be compatible with Python 2.6
except URLError as e:
raise RequestError("speech recognition connection failed: {0}".format(e.reason))
response_text = response.read().decode("utf-8")
result = json.loads(response_text)
# Dump to MongoDB for later analysis!
final_dict = {}
final_dict['session_key'] = session_key
final_dict['user'] = user_id
final_dict['data'] = result
raw_data_collection.insert_one(final_dict).inserted_id
return result
#================================================================================================
# Processing a text transcript from bluemix results data
#================================================================================================
def process_transcript(raw_data):
result = raw_data
pp.pprint(raw_data)
transcription = []
for utterance in result["results"]:
if "alternatives" not in utterance: raise UnknownValueError()
for hypothesis in utterance["alternatives"]:
if "transcript" in hypothesis:
transcription.append(hypothesis["transcript"])
transcript = ". ".join(transcription)
# Dump transcript into MongoDB for display and later analysis!
final_dict = {}
final_dict['session_key'] = session_key
final_dict['user'] = user_id
final_dict['transcript'] = transcript
transcript_collection.insert_one(final_dict).inserted_id
#=======================================================
# Main Caller
#=======================================================
if __name__ == "__main__":
final = sys.argv[1].split('_')
session_key = final[1]
user_id = final[2]
raw_data = bluemix_call(sys.argv[1])
process_transcript(raw_data)