-
Notifications
You must be signed in to change notification settings - Fork 5
/
deepgram_streaming.py
105 lines (93 loc) · 4 KB
/
deepgram_streaming.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
""" A simple example which prints out parsed streaming responses.
Python version: 3.6+
Dependencies (use `pip install X` to install a dependency):
- websockets
Usage:
python deepgram_streaming.py -k 'YOUR_DEEPGRAM_API_KEY' /path/to/audio.wav
Limitations:
- Only parses signed, 16-bit little-endian encoded WAV files.
"""
import argparse
import asyncio
import base64
import json
import sys
import wave
import websockets
import subprocess
# Mimic sending a real-time stream by sending this many seconds of audio at a time.
REALTIME_RESOLUTION = 0.100
async def run(data, key, channels, sample_width, sample_rate, filepath):
# How many bytes are contained in one second of audio.
byte_rate = sample_width * sample_rate * channels
print('This demonstration will print all finalized results, not interim results.')
# Connect to the real-time streaming endpoint, attaching our credentials.
async with websockets.connect(
# Alter the protocol and base URL below.
f'wss://api.deepgram.com/v1/listen?punctuate=true&channels={channels}&sample_rate={sample_rate}&encoding=linear16',
extra_headers={
'Authorization': 'Token {}'.format(key)
}
) as ws:
async def sender(ws):
""" Sends the data, mimicking a real-time connection.
"""
nonlocal data
try:
total = len(data)
while len(data):
# How many bytes are in `REALTIME_RESOLUTION` seconds of audio?
i = int(byte_rate * REALTIME_RESOLUTION)
chunk, data = data[:i], data[i:]
# Send the data
await ws.send(chunk)
# Mimic real-time by waiting `REALTIME_RESOLUTION` seconds
# before the next packet.
await asyncio.sleep(REALTIME_RESOLUTION)
# An empty binary message tells Deepgram that no more audio
# will be sent. Deepgram will close the connection once all
# audio has finished processing.
await ws.send(b'')
except Exception as e:
print(f'Error while sending: {e}')
raise
async def receiver(ws):
""" Print out the messages received from the server.
"""
async for msg in ws:
res = json.loads(msg)
try:
# To see interim results in this demo, remove the conditional `if res['is_final']:`.
if res['is_final']:
transcript = res['channel']['alternatives'][0]['transcript']
start = res['start']
print(f'{transcript}')
except KeyError:
print(msg)
await asyncio.wait([
asyncio.ensure_future(sender(ws)),
asyncio.ensure_future(receiver(ws))
])
print()
def parse_args():
""" Parses the command-line arguments.
"""
parser = argparse.ArgumentParser(description='Submits data to the real-time streaming endpoint.')
parser.add_argument('-k', '--key', required=True, help='YOUR_DEEPGRAM_API_KEY (authorization)')
parser.add_argument('input', help='Input file.')
return parser.parse_args()
def main():
""" Entrypoint for the example.
"""
# Parse the command-line arguments.
args = parse_args()
# Open the audio file.
with wave.open(args.input, 'rb') as fh:
(channels, sample_width, sample_rate, num_samples, _, _) = fh.getparams()
assert sample_width == 2, 'WAV data must be 16-bit.'
data = fh.readframes(num_samples)
print(f'Channels = {channels}, Sample Rate = {sample_rate} Hz, Sample width = {sample_width} bytes, Size = {len(data)} bytes', file=sys.stderr)
# Run the example.
asyncio.get_event_loop().run_until_complete(run(data, args.key, channels, sample_width, sample_rate, args.input))
if __name__ == '__main__':
sys.exit(main() or 0)