-
Notifications
You must be signed in to change notification settings - Fork 0
/
speech_to_text_whisper.py
95 lines (75 loc) · 2.54 KB
/
speech_to_text_whisper.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
# Description: Record audio from the macOS microphone and transcribe it using OpenAI's Whisper ASR API
# really archaic with the keyboard input, but it kinda works
# tested only on macOS 10.15.7 with Python 3.9
# Bryan Randell 2023
import pyaudio
import wave
import tempfile
import openai
import os
import keyboard
import time
from dotenv import load_dotenv
# Function to record audio from the macOS microphone
def record_audio(filename):
CHUNK = 1024
FORMAT = pyaudio.paInt16
CHANNELS = 1
RATE = 16000
p = pyaudio.PyAudio()
stream = p.open(format=FORMAT,
channels=CHANNELS,
rate=RATE,
input=True,
frames_per_buffer=CHUNK)
print("Press 'a' to start and stop recording.")
frames = []
start_time = time.time()
while True:
if keyboard.is_pressed('a'):
time.sleep(0.2)
break
if time.time() - start_time > 100:
print("Recording timed out.")
return
print("Recording...")
start_time = time.time()
while not keyboard.is_pressed('a'):
if time.time() - start_time > 120:
print("Recording is too long.")
break
data = stream.read(CHUNK, exception_on_overflow=False)
frames.append(data)
time.sleep(0.01)
print("Recording finished.")
stream.stop_stream()
stream.close()
p.terminate()
wf = wave.open(filename, 'wb')
wf.setnchannels(CHANNELS)
wf.setsampwidth(p.get_sample_size(FORMAT))
wf.setframerate(RATE)
wf.writeframes(b''.join(frames))
wf.close()
# Function to transcribe audio using OpenAI's Whisper ASR API
def transcribe_audio(filename: str) -> str:
with open(filename, 'rb') as audio_file:
transcript = openai.Audio.transcribe("whisper-1", audio_file)
return transcript["text"]
# Main function to record audio and transcribe it
def main_audio_to_text() -> str:
# Record audio and save it as a temporary WAV file
load_dotenv()
openai.api_key = os.getenv("OPENAI_API_KEY")
with tempfile.NamedTemporaryFile(suffix='.wav', delete=True) as temp_file:
record_audio(temp_file.name) # Record for 5 seconds
# Use OpenAI's Whisper API to transcribe the audio
transcription = transcribe_audio(temp_file.name)
if transcription:
print("Transcription:", transcription)
return transcription
else:
print("Error transcribing audio.")
return ""
if __name__ == '__main__':
main_audio_to_text()