-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathmain.py
executable file
·54 lines (53 loc) · 2.02 KB
/
main.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
import boto3
import os
import setting
from time import sleep
import requests
import re
# AWS requests config
my_bucket_name = 'mytestset'
# To use Transcribe service, you must use aws s3 to store audio files!
# s3 client
s3_client = boto3.resource(
's3',
aws_access_key_id=setting.AWSAccessKeyId,
aws_secret_access_key=setting.AWSSecretKey)
testset = s3_client.Bucket(my_bucket_name)
# ASR client
asr_client = boto3.client('transcribe', region_name='us-east-2',
aws_access_key_id=setting.AWSAccessKeyId,
aws_secret_access_key=setting.AWSSecretKey)
# loop through the files in s3
for file in testset.objects.all():
file_name = file.key
file_uri = 's3://' + my_bucket_name + '/' + file_name
print(file_name)
# Start a transciption job
if(re.search(r"-w\.wav$", file_name) != None): # I just wanna use some files
try:
asr_client.start_transcription_job(
TranscriptionJobName=file_name,
Media={'MediaFileUri': file_uri},
MediaFormat='wav',
LanguageCode='fa-IR')
# See if the job is done
while True:
response = asr_client.get_transcription_job(
TranscriptionJobName=file_name)
status = response['TranscriptionJob']['TranscriptionJobStatus']
if status == 'COMPLETED':
sharable_url = response['TranscriptionJob']['Transcript']['TranscriptFileUri']
output = requests.get(sharable_url)
open('output/'+file_name+'.txt',
'wb').write(output.content)
break
elif status == 'FAILED':
print('failed and skiped')
break
else:
print(file_name)
print(status)
sleep(15)
except Exception as e:
print(e)
pass