Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Update App.py #60

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
108 changes: 53 additions & 55 deletions app.py
Original file line number Diff line number Diff line change
@@ -1,104 +1,102 @@
import streamlit as st
import streamlit as st
from dotenv import load_dotenv
from PyPDF2 import PdfReader
from langchain.text_splitter import CharacterTextSplitter
from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
from langchain.vectorstores import FAISS
from langchain.chat_models import ChatOpenAI
from langchain_community.embeddings.openai import OpenAIEmbeddings
from langchain_community.embeddings.huggingface import HuggingFaceInstructEmbeddings
from langchain.vectorstores.faiss import FAISS
import torch
from langchain.llms.openai import OpenAI
from langchain.llms.huggingface_hub import HuggingFaceHub
from langchain.chat_models.openai import ChatOpenAI
from langchain.memory import ConversationBufferMemory
from langchain.chains import ConversationalRetrievalChain
from htmlTemplates import css, bot_template, user_template
from langchain.llms import HuggingFaceHub
from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain

def get_pdf_text(pdf_docs):
text = ""
text = ""
for pdf in pdf_docs:
pdf_reader = PdfReader(pdf)
for page in pdf_reader.pages:
text += page.extract_text()
return text



def get_text_chunks(text):
text_splitter = CharacterTextSplitter(
separator="\n",
chunk_size=1000,
chunk_overlap=200,
length_function=len
)
length_function = len
)
chunks = text_splitter.split_text(text)
return chunks


def get_vectorstore(text_chunks):
embeddings = OpenAIEmbeddings()
# embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vectorstore
#embeddings = OpenAIEmbeddings()
DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
embeddings = HuggingFaceInstructEmbeddings(model_name ="hkunlp/instructor-xl",model_kwargs={"device": DEVICE})
vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
return vector_store


def get_conversation_chain(vectorstore):
def get_conversation_chain(vector_store):
llm = ChatOpenAI()
# llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})

memory = ConversationBufferMemory(
memory_key='chat_history', return_messages=True)
conversation_chain = ConversationalRetrievalChain.from_llm(
#llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
convesation_chain = ConversationalRetrievalChain.from_llm(
llm=llm,
retriever=vectorstore.as_retriever(),
memory=memory
retriever=vector_store.as_retreiver(),
memory = memory
)
return conversation_chain
return convesation_chain
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Should be return conversation_chain

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

that's fine he used "covesation_chain" in both places, so should work.
but recommend changing to "conversation_chain"



def handle_userinput(user_question):
def handle_user_input(user_question):
response = st.session_state.conversation({'question': user_question})
st.session_state.chat_history = response['chat_history']

for i, message in enumerate(st.session_state.chat_history):
for i, message in enumerate[st.session_state.chat_history]:
if i % 2 == 0:
st.write(user_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)
st.write(user_template.replace("{{MSG}}",message.content),unsafe_allow_html=True)
else:
st.write(bot_template.replace(
"{{MSG}}", message.content), unsafe_allow_html=True)


st.write(bot_template.replace("{{MSG}}",message.content),unsafe_allow_html=True)

def main():
load_dotenv()
st.set_page_config(page_title="Chat with multiple PDFs",
page_icon=":books:")
st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")

st.write(css, unsafe_allow_html=True)

if "conversation" not in st.session_state:
st.session_state.conversation = None
if "chat_history" not in st.session_state:
st.session_state.chat_history = None

st.header("Chat with multiple PDFs :books:")
user_question = st.text_input("Ask a question about your documents:")
user_question = st.text_input("Ask a question your documents:")
if user_question:
handle_userinput(user_question)
handle_user_input(user_question)
st.write(user_template.replace("{{MSG}}","Hello robot"), unsafe_allow_html=True)
st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True)

with st.sidebar:
st.subheader("Your documents")
pdf_docs = st.file_uploader(
"Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
st.subheader("Your Documents")
pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'",accept_multiple_files=True)

if st.button("Process"):
with st.spinner("Processing"):
# get pdf text
# get pdf text
raw_text = get_pdf_text(pdf_docs)

# get the text chunks
# get the text chunks
text_chunks = get_text_chunks(raw_text)

# create vector store
vectorstore = get_vectorstore(text_chunks)

# create vector store
vector_store = get_vectorstore(text_chunks)
# create conversation chain
st.session_state.conversation = get_conversation_chain(
vectorstore)
st.session_state.conversation = get_conversation_chain(vector_store)

st.session_state.conversation



if __name__ == '__main__':
main()
main()