-
Notifications
You must be signed in to change notification settings - Fork 2
/
app.py
196 lines (163 loc) · 6.86 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
import os
import time
from datetime import datetime
import configparser
from werkzeug.utils import secure_filename
from flask import Flask, request, render_template, jsonify, url_for, session
import openai
from gtts import gTTS
import secrets
import csv
import uuid
# Loading OpenAI API key from configuration file
config = configparser.ConfigParser()
config.read('config.ini')
openai.api_key = config.get('OPENAI_API', 'key')
# Development mode, unable requests to OpenAI
DEV_MODE = False
# Used when testing the app with the flask server locally, needs a signed certificate
DEV_MODE_APP = True
# Initializing Flask app
app = Flask(__name__)
# Setting up paths for upload and audio directories
app.config['UPLOAD_FOLDER'] = "static/audio/"
app.config['AUDIO_FOLDER'] = "static/audio/"
# Generating a secret key for the session
app.config['SECRET_KEY'] = secrets.token_hex(16)
@app.route('/')
def index():
# Serving the initial index page
return render_template('index.html')
@app.route('/transcribe', methods=['POST'])
def transcribe_audio():
# Transcribing uploaded audio file
if 'audio' not in request.files:
return jsonify({'error': 'No audio file provided'}), 400
# Securing the filename and saving it in the defined upload directory
audio_file = request.files['audio']
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
filename = f"recording_{timestamp}_{uuid.uuid4()}.webm"
file_path = os.path.join(app.config['UPLOAD_FOLDER'], filename)
audio_file.save(file_path)
# Waiting for the file to be completely written to the disk
wait_for_file(file_path)
# Transcribing the audio file using OpenAI's whisper model
input_language = request.form['input_language']
if(DEV_MODE):
transcript = "This is DEV mode."
else:
transcript = transcribe(file_path, input_language)
# Save the transcript to the CSV file along with the IP address and User-Agent
user_agent = request.headers.get('User-Agent', 'Unknown') # Default to 'Unknown' if User-Agent header is missing
save_to_csv(transcript, request.remote_addr, user_agent)
return jsonify({'transcript': transcript})
@app.route('/translate', methods=['POST'])
def translate_audio():
# Translating provided text and converting it into speech
req_data = request.get_json()
# If the selected output language is 'auto'
if req_data['output_language'] == 'auto':
return jsonify({
'audio_url': '',
'translation': "The output language cannot be set to 'auto'"
})
if(DEV_MODE):
translation = "This is DEV mode."
return jsonify({
'audio_url': url_for('static', filename=f'audio/dev_audio.m4a'),
'translation': translation
})
else:
# Translating the text
translation = translate(
req_data['text'],
input_language=req_data['input_language'],
output_language=req_data['output_language']
)
# Converting the translated text into speech
tts = gTTS(translation, lang=req_data['output_language'])
# Remove the previous audio file
if not session.get('last_audio_file', None) == None:
if os.path.exists(session.get('last_audio_file', '')):
os.remove(session.get('last_audio_file', ''))
# Saving the speech file to the audio directory
timestamp = datetime.now().strftime("%Y-%m-%d-%H-%M-%S")
filename = f"text2speech_{timestamp}.mp3"
file_path = os.path.join(app.config['AUDIO_FOLDER'], filename)
tts.save(file_path)
wait_for_file(file_path)
# Storing the path of the last audio file in the session
session['last_audio_file'] = file_path
return jsonify({
'audio_url': url_for('static', filename=f'audio/{filename}'),
'translation': translation
})
@app.route('/audio', methods=['GET'])
def get_last_audio():
if(DEV_MODE):
return jsonify({
'audio_url': url_for('static', filename=f'audio/dev_audio.m4a')
})
else:
# Returning the path of the last audio file from the session
return jsonify({'audio_url': session.get('last_audio_file', '') })
def transcribe(file_path, input_language):
# Transcribing audio using OpenAI's whisper model
with open(file_path, "rb") as audio_file:
if(input_language == "auto"):
transcript = openai.Audio.transcribe(
"whisper-1", audio_file
)
else:
transcript = openai.Audio.transcribe(
"whisper-1", audio_file, language=input_language
)
return transcript['text']
def translate(text, input_language, output_language):
# Translating text using OpenAI's gpt-3.5-turbo model
if input_language == "auto":
messages = [
{
"role": "system",
"content": (
f"You are a helpful AI translator. You will receive a transcribe in "
f"'and you have to translate in '{output_language}'."
"The translation should be in spoken language. Only reply with the direct translation."
)
},
{"role": "user", "content": f"Transcribe: {text}\nTranslation:"}
]
else:
messages = [
{
"role": "system",
"content": (
f"You are a helpful AI translator. You will receive a transcribe in "
f"'{input_language}', and you have to translate in '{output_language}'. "
"The translation should be in spoken language. Only reply with the direct translation."
)
},
{"role": "user", "content": f"Transcribe: {text}\nTranslation:"}
]
translation = openai.ChatCompletion.create(
model="gpt-3.5-turbo",
messages=messages
)
return translation['choices'][0]['message']['content']
def wait_for_file(file_path):
# Wait for file to exist and to be non-empty before proceeding
while not os.path.exists(file_path) or not os.path.getsize(file_path) > 0:
time.sleep(0.1)
def save_to_csv(transcript, ip_address, user_agent, filename="history/transcripts.csv"):
# Check if the directory exists, if not, create it
directory = os.path.dirname(filename)
if not os.path.exists(directory):
os.makedirs(directory)
with open(filename, mode='a', newline='') as file:
writer = csv.writer(file)
# The current time, transcript, IP address, and User-Agent are saved
writer.writerow([datetime.now(), transcript, ip_address, user_agent])
# Run the Flask app
if __name__ == '__main__':
if DEV_MODE_APP:
app.run(ssl_context=('cert.pem', 'key.pem'), debug=True, host="0.0.0.0", port=5009)