-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
82 lines (72 loc) · 2.98 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
import streamlit as st # for the web app
import wave # to write the audio as WAV file
import os # file management
import requests # to handle timeout of the API call
import pyaudio # recording and running an audio file
import whisper # for speech2text
import openai # for GPT3
st.title("Audio Summarizer")
st.write("""1. Set audio recording Parameters on the left sidebar (or leave defaults).\n
2. Click 'Record Audio' and speak into your microphone.\n
3. Enter a prompt in the text area on the left sidebar.\n
4. Click 'Transcribe and Process Prompt' to convert speech to text and process it as per the prompt.\n
The result will be displayed below.
""")
# PARAMETERS FOR RECORDING FROM MICROPHONE
DURATION = int(st.sidebar.number_input(label="Duration (s)", value=20)) # seconds of recording
CHANNELS = int(st.sidebar.number_input(label="Channels (1=mono, 2=stereo, etc.)", value=1)) # mono/stereo
FORMAT = pyaudio.paInt16
RATE = int(st.sidebar.number_input(label="Sampling Rate (Hz)", value=44100)) # Hz
CHUNK = int(st.sidebar.number_input(label="Chunk Size", value=1024))
TEMP_AUDIO_FILE_PATH = "./output.wav"
# RECORD FROM MICROPHONE
if st.sidebar.button("Record Audio"):
audio = pyaudio.PyAudio()
stream = audio.open(format=FORMAT, channels=CHANNELS, rate=RATE, input=True, frames_per_buffer=CHUNK)
st.write("Recording in progress...")
frames = []
for i in range(0, int(RATE / CHUNK * DURATION)):
data = stream.read(CHUNK)
frames.append(data)
stream.stop_stream()
stream.close()
audio.terminate()
with wave.open(TEMP_AUDIO_FILE_PATH, 'wb') as waveFile:
# use setparams for waveFile
waveFile.setparams((CHANNELS, audio.get_sample_size(FORMAT), RATE, 0, 'NONE', 'not compressed'))
waveFile.writeframes(b''.join(frames))
waveFile.close()
st.write(f"Recording complete and saved as {TEMP_AUDIO_FILE_PATH}")
# text = ""
prompt = st.sidebar.text_area("Prompt", value="Summarize this text")
# SPEECH TO TEXT
if st.sidebar.button("Transcribe Audio and Process Prompt"):
st.write("Converting speech to text...")
# convert speech to text using whisper
model = whisper.load_model("base") # transcribes the audio file
result = model.transcribe(TEMP_AUDIO_FILE_PATH)
text = result["text"]
st.subheader("Transcription:")
st.write(text)
# TEXT PROCESSING
prompt = (prompt + ": \n" + text)
print("\ntext sent to GPT = " + text)
st.write("Processing text...")
# process text using GPT3
openai.api_key = os.getenv("OPENAI_API_KEY")
response = openai.Completion.create(
engine="text-davinci-003",
prompt=prompt,
temperature=0.5,
max_tokens=1024,
n=1,
# top_p=1,
# frequency_penalty=0,
# presence_penalty=0.6,
# stop=["\n", " Human:", " AI:"]
stop=None
)
print("openAI.completion created")
result = response.choices[0].text
st.subheader("GPT response:")
st.write(result)