From f24c74d1130c907ddaf3a548897ed99ab208be94 Mon Sep 17 00:00:00 2001 From: Thumpudi Vishnu <87056972+vishnuthumpudi@users.noreply.github.com> Date: Sat, 20 Apr 2024 07:19:57 +0530 Subject: [PATCH] Update App.py --- app.py | 108 ++++++++++++++++++++++++++++----------------------------- 1 file changed, 53 insertions(+), 55 deletions(-) diff --git a/app.py b/app.py index cde7c608..c3502f85 100644 --- a/app.py +++ b/app.py @@ -1,104 +1,102 @@ -import streamlit as st +import streamlit as st from dotenv import load_dotenv from PyPDF2 import PdfReader from langchain.text_splitter import CharacterTextSplitter -from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings -from langchain.vectorstores import FAISS -from langchain.chat_models import ChatOpenAI +from langchain_community.embeddings.openai import OpenAIEmbeddings +from langchain_community.embeddings.huggingface import HuggingFaceInstructEmbeddings +from langchain.vectorstores.faiss import FAISS +import torch +from langchain.llms.openai import OpenAI +from langchain.llms.huggingface_hub import HuggingFaceHub +from langchain.chat_models.openai import ChatOpenAI from langchain.memory import ConversationBufferMemory -from langchain.chains import ConversationalRetrievalChain from htmlTemplates import css, bot_template, user_template -from langchain.llms import HuggingFaceHub +from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain def get_pdf_text(pdf_docs): - text = "" + text = "" for pdf in pdf_docs: pdf_reader = PdfReader(pdf) for page in pdf_reader.pages: text += page.extract_text() return text - - + def get_text_chunks(text): text_splitter = CharacterTextSplitter( separator="\n", chunk_size=1000, chunk_overlap=200, - length_function=len - ) + length_function = len + ) chunks = text_splitter.split_text(text) return chunks - def get_vectorstore(text_chunks): - embeddings = OpenAIEmbeddings() - # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl") - vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings) - return vectorstore + #embeddings = OpenAIEmbeddings() + DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu" + embeddings = HuggingFaceInstructEmbeddings(model_name ="hkunlp/instructor-xl",model_kwargs={"device": DEVICE}) + vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings) + return vector_store - -def get_conversation_chain(vectorstore): +def get_conversation_chain(vector_store): llm = ChatOpenAI() - # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) - - memory = ConversationBufferMemory( - memory_key='chat_history', return_messages=True) - conversation_chain = ConversationalRetrievalChain.from_llm( + #llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512}) + memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True) + convesation_chain = ConversationalRetrievalChain.from_llm( llm=llm, - retriever=vectorstore.as_retriever(), - memory=memory + retriever=vector_store.as_retreiver(), + memory = memory ) - return conversation_chain + return convesation_chain - -def handle_userinput(user_question): +def handle_user_input(user_question): response = st.session_state.conversation({'question': user_question}) st.session_state.chat_history = response['chat_history'] - - for i, message in enumerate(st.session_state.chat_history): + + for i, message in enumerate[st.session_state.chat_history]: if i % 2 == 0: - st.write(user_template.replace( - "{{MSG}}", message.content), unsafe_allow_html=True) + st.write(user_template.replace("{{MSG}}",message.content),unsafe_allow_html=True) else: - st.write(bot_template.replace( - "{{MSG}}", message.content), unsafe_allow_html=True) - - + st.write(bot_template.replace("{{MSG}}",message.content),unsafe_allow_html=True) + def main(): load_dotenv() - st.set_page_config(page_title="Chat with multiple PDFs", - page_icon=":books:") + st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:") + st.write(css, unsafe_allow_html=True) - + if "conversation" not in st.session_state: st.session_state.conversation = None if "chat_history" not in st.session_state: st.session_state.chat_history = None - st.header("Chat with multiple PDFs :books:") - user_question = st.text_input("Ask a question about your documents:") + user_question = st.text_input("Ask a question your documents:") if user_question: - handle_userinput(user_question) + handle_user_input(user_question) + st.write(user_template.replace("{{MSG}}","Hello robot"), unsafe_allow_html=True) + st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True) with st.sidebar: - st.subheader("Your documents") - pdf_docs = st.file_uploader( - "Upload your PDFs here and click on 'Process'", accept_multiple_files=True) + st.subheader("Your Documents") + pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'",accept_multiple_files=True) + if st.button("Process"): with st.spinner("Processing"): - # get pdf text + # get pdf text raw_text = get_pdf_text(pdf_docs) - - # get the text chunks + + # get the text chunks text_chunks = get_text_chunks(raw_text) - - # create vector store - vectorstore = get_vectorstore(text_chunks) - + + # create vector store + vector_store = get_vectorstore(text_chunks) + # create conversation chain - st.session_state.conversation = get_conversation_chain( - vectorstore) + st.session_state.conversation = get_conversation_chain(vector_store) + + st.session_state.conversation + if __name__ == '__main__': - main() + main() \ No newline at end of file