-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathaudio_to_text.py
52 lines (47 loc) · 1.69 KB
/
audio_to_text.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
from pytube import YouTube
import pytube
import os
import pywhisper
from youtube_transcript_api import YouTubeTranscriptApi
import streamlit as st
import time
def transcribe_video_to_text1(url):
try:
yt = YouTube(url)
video = yt.streams.filter(only_audio=True).first()
out_file = video.download(output_path="Youtube",filename="audio.mp3")
directory_path = r'c:\Users\benal\Langchain'
audio_path = os.path.join(directory_path, out_file)
os.startfile(audio_path)
model = pywhisper.load_model("base")
result = model.transcribe(audio_path,language='en',temperature=0)
video_title = yt.title
output_file_path = fr"c:\\Users\\benal\\Langchain\\Youtube\\video.txt"
transcribed_text=result['text']
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.write(transcribed_text)
return transcribed_text
except pytube.exceptions.RegexMatchError:
pass
def transcribe_video_to_text(url):
try:
yt = YouTube(url)
video_title = yt.title
id=url.split("=")[-1]
srt1 = YouTubeTranscriptApi.get_transcript(id)
transcript_text=""
for i in srt1:
transcript_text=transcript_text+" "+i['text']
output_file_path=fr'c:\Users\benal\Langchain\youtube\video.txt'
with open(output_file_path, 'w', encoding='utf-8') as output_file:
output_file.write(transcript_text)
return transcript_text
except pytube.exceptions.RegexMatchError:
pass
except Exception as e:
st.error('This Video has no Transcript. Please try another video', icon="🚨")
time.sleep(5)
st.rerun()
if __name__ == "__main__":
video_url="https://www.youtube.com/watch?v=your_video_id"
transcribe_video_to_text(video_url)