-
Notifications
You must be signed in to change notification settings - Fork 0
/
microphone.py
177 lines (149 loc) · 6.23 KB
/
microphone.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
import time
from websockets.exceptions import ConnectionClosedOK
import asyncio
import json
import websockets
import pyaudio
import os
import argparse
import logging
import os
from dotenv import load_dotenv
logger = logging.getLogger("usemicrophone")
load_dotenv("/home/notto/workspace/repos/linux-stt/.env")
def configure_logger(loglevel):
debugdict = dict(
DEBUG=logging.DEBUG,
INFO=logging.INFO,
WARNING=logging.WARNING,
ERROR=logging.ERROR,
)
logger.setLevel(debugdict[loglevel])
formatter = logging.Formatter("%(levelname)-8s %(asctime)s %(name)-12s %(message)s")
streamhandle = logging.StreamHandler()
streamhandle.setFormatter(formatter)
logger.addHandler(streamhandle)
# Re-enable the FileHandler if you want to see the logs
# filehandle = logging.FileHandler(
# "/full/path/to/linux-stt/logs.log"
# )
# filehandle.setFormatter(formatter)
# logger.addHandler(filehandle)
### Update these to match the audio format from your microphone
FORMAT = pyaudio.paInt16
CHANNELS = 2
RATE = 44100
frames = []
recv_messages = []
def _handle_task_result(task):
try:
task.result()
except asyncio.CancelledError:
pass # Task cancellation should not be logged as an error.
except Exception: # pylint: disable=broad-except
logger.error("Exception raised by task = %r", task)
async def start_stream(mic_stream, uri):
"""Run websocket connection to stream audio file to uri.
Parameters
----------
mic_stream: pyaudio.Stream object
uri: string
The full destination with request parameters baked in
"""
extra_headers = {"Authorization": f"Token {os.getenv('DEEPGRAM_API_KEY')}"}
if "?" in uri:
uri = f"{uri}&encoding=linear16&sample_rate={RATE}&channels={CHANNELS}"
else:
uri = f"{uri}?encoding=linear16&sample_rate={RATE}&channels={CHANNELS}"
logger.debug(uri)
try:
async with websockets.connect(uri, extra_headers=extra_headers) as ws:
# see https://websockets.readthedocs.io/en/stable/reference/client.html#websockets.client.WebSocketClientProtocol
shared_data = {"endstream": False}
requestid = ws.response_headers.get("dg-request-id", ws.response_headers)
logger.debug(f"Request: {requestid}")
async def sender(mic_stream, ws, shared):
"""Send audio through websocket."""
while True:
now = time.time()
piece = mic_stream.read(mic_stream.get_read_available())
if shared_data["endstream"]:
piece = b"" # This will close the connection
logger.debug("Sending close frame")
await ws.send(piece)
break
elif len(piece) == 0:
continue
try:
frames.append(piece)
await ws.send(piece)
except ConnectionClosedOK:
break
await asyncio.sleep(0.01)
# This example function will handle responses.
async def receiver(ws, shared):
async for msg in ws:
# Deserialize the JSON message.
msg = json.loads(msg)
if msg.get("type", "") == "UtteranceEnd":
logger.info(msg)
continue
try:
transcript = (
msg.get("channel").get("alternatives")[0].get("transcript")
)
final = msg.get("is_final")
speech_final = msg.get("speech_final")
if final:
logger.debug(
f"{transcript}, is_final {final}, speech_final {speech_final}"
)
if len(transcript.strip()) > 0:
if "exit" in transcript.lower():
last_pos = transcript.lower().rfind("exit")
transcript = transcript[:last_pos]
shared["endstream"] = True
print(f"{transcript}")
except Exception as e:
# The above get will fail on final metadata response
logger.error(f"Transcript processing error {e}")
if msg.get("channel", False):
recv_messages.append(msg)
loop = asyncio.get_event_loop()
sendertask = loop.create_task(sender(mic_stream, ws, shared_data))
receivertask = loop.create_task(receiver(ws, shared_data))
sendertask.add_done_callback(_handle_task_result)
receivertask.add_done_callback(_handle_task_result)
await asyncio.wait([sendertask, receivertask], timeout=None)
except Exception as e:
logger.error(f"Exception: {e}")
logger.error(f"Dir: {dir(e)}")
logger.error(f"Headers: {e.headers}")
def stream_microphone(mic_stream, uri):
asyncio.run(start_stream(mic_stream, uri))
if __name__ == "__main__":
parser = argparse.ArgumentParser("microphone")
parser.add_argument("url", help="The URL to hit", type=str)
parser.add_argument(
"--loglevel", help="The logging level", type=str, default="INFO"
)
args = parser.parse_args()
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True)
configure_logger(args.loglevel)
try:
stream_microphone(stream, args.url)
except Exception as e:
logger.error(f"Found exception {e}")
finally:
stream.close()
transcript = ""
for msg in recv_messages:
if "channel" in msg:
if msg.get("is_final", False):
transcript = (
transcript.strip()
+ " "
+ msg["channel"]["alternatives"][0]["transcript"]
)
# print(transcript) # There are two print-statements in this file. Swap them out to print either streaming, or at the end.