-
Notifications
You must be signed in to change notification settings - Fork 0
/
app.py
90 lines (70 loc) · 2.72 KB
/
app.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
import pytube
import requests
import os
from langchain import PromptTemplate, LLMChain
from langchain.chat_models import ChatOpenAI
from bardapi import Bard
from dotenv import find_dotenv, load_dotenv
import openai
load_dotenv(find_dotenv())
openai.api_key = os.getenv("OPENAI_API_KEY")
BARD_API_KEY=os.getenv("BARD_API_KEY")
bard = Bard(token=BARD_API_KEY)
# Extract audio from YouTube
def get_audio(video_url):
print("-----GETTING AUDIO-----")
# Create a PyTube object for the video.
youtube_video = pytube.YouTube(video_url)
# Get the audio stream from the video.
audio_stream = youtube_video.streams.filter(only_audio=True)
# Download the audio stream to a file.
audio_stream[0].download(output_path="audios", filename="audio.mp3")
print("-----GETTING AUDIO DONE-----")
# Audio to text
def get_text(filename):
print("-----GENERATING TRANSCRIPT-----")
audio_file= open(filename, "rb")
transcript = openai.Audio.transcribe("whisper-1", audio_file)
print("-----TRANSCRIPT GENERATED-----")
return transcript
def get_facts(transcript):
print("-----EXTRACTING FACTS-----")
template = """
Given this text. Extract top ten facts from it.;
CONTEXT: {transcript}
FACTS:
"""
prompt = PromptTemplate(template=template, input_variables=["transcript"])
fact_llm = LLMChain(llm=ChatOpenAI(
model_name="gpt-3.5-turbo", temperature=1), prompt=prompt, verbose=True)
facts = fact_llm.predict(transcript=transcript)
print("Here are the facts", facts)
return facts
def organize_facts(facts):
# Split the input text into individual facts based on a delimiter
facts_text = facts.split("\n")
organized_facts = []
for i, fact_text in enumerate(facts_text, start=1):
organized_fact = {
"description": fact_text.strip() # Remove leading/trailing whitespaces
}
organized_facts.append(organized_fact)
return organized_facts
def check_facts(organized_facts):
print("-----CHECKING FACTS NOW-----")
for fact in organized_facts:
# print(f"{fact['description']}")
fact_to_check = fact['description']
query = f"""
Answer in yes or no. Check if the following statement is factually correct. Check years, names, locations, historical events, numbers, or any other named entity. {fact_to_check} ?
Also cite sources for your answer by mentioning URLs.
"""
answer = bard.get_answer(query)['content']
print(fact)
print (answer)
print("------")
get_audio("https://www.youtube.com/watch?v=KaWihejcGcM")
transcription = get_text("audios/audio.mp3")
facts = get_facts(transcription)
organized_facts = organize_facts(facts)
check_facts(organized_facts)