alejandro-ao · vishnuthumpudi · Apr 20, 2024 · jash0803 · Aug 5, 2024 · kd10041
diff --git a/app.py b/app.py
@@ -1,104 +1,102 @@
-import streamlit as st
+import streamlit as st 
 from dotenv import load_dotenv
 from PyPDF2 import PdfReader
 from langchain.text_splitter import CharacterTextSplitter
-from langchain.embeddings import OpenAIEmbeddings, HuggingFaceInstructEmbeddings
-from langchain.vectorstores import FAISS
-from langchain.chat_models import ChatOpenAI
+from langchain_community.embeddings.openai import OpenAIEmbeddings
+from langchain_community.embeddings.huggingface import HuggingFaceInstructEmbeddings
+from langchain.vectorstores.faiss import FAISS
+import torch
+from langchain.llms.openai import OpenAI
+from langchain.llms.huggingface_hub import HuggingFaceHub
+from langchain.chat_models.openai import ChatOpenAI
 from langchain.memory import ConversationBufferMemory
-from langchain.chains import ConversationalRetrievalChain
 from htmlTemplates import css, bot_template, user_template
-from langchain.llms import HuggingFaceHub
+from langchain.chains.conversational_retrieval.base import ConversationalRetrievalChain 
 
 def get_pdf_text(pdf_docs):
-    text = ""
+    text = ""  
     for pdf in pdf_docs:
         pdf_reader = PdfReader(pdf)
         for page in pdf_reader.pages:
             text += page.extract_text()
     return text
-
-
+
 def get_text_chunks(text):
     text_splitter = CharacterTextSplitter(
         separator="\n",
         chunk_size=1000,
         chunk_overlap=200,
-        length_function=len
-    )
+        length_function = len
+        ) 
     chunks = text_splitter.split_text(text)
     return chunks
 
-
 def get_vectorstore(text_chunks):
-    embeddings = OpenAIEmbeddings()
-    # embeddings = HuggingFaceInstructEmbeddings(model_name="hkunlp/instructor-xl")
-    vectorstore = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
-    return vectorstore
+    #embeddings = OpenAIEmbeddings()
+    DEVICE = "cuda:0" if torch.cuda.is_available() else "cpu"
+    embeddings = HuggingFaceInstructEmbeddings(model_name ="hkunlp/instructor-xl",model_kwargs={"device": DEVICE})
+    vector_store = FAISS.from_texts(texts=text_chunks, embedding=embeddings)
+    return vector_store
 
-
-def get_conversation_chain(vectorstore):
+def get_conversation_chain(vector_store):
     llm = ChatOpenAI()
-    # llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
-
-    memory = ConversationBufferMemory(
-        memory_key='chat_history', return_messages=True)
-    conversation_chain = ConversationalRetrievalChain.from_llm(
+    #llm = HuggingFaceHub(repo_id="google/flan-t5-xxl", model_kwargs={"temperature":0.5, "max_length":512})
+    memory = ConversationBufferMemory(memory_key='chat_history', return_messages=True)
+    convesation_chain = ConversationalRetrievalChain.from_llm(
         llm=llm,
-        retriever=vectorstore.as_retriever(),
-        memory=memory
+        retriever=vector_store.as_retreiver(),
+        memory = memory
     )
-    return conversation_chain
+    return convesation_chain
 
-
-def handle_userinput(user_question):
+def handle_user_input(user_question):
     response = st.session_state.conversation({'question': user_question})
     st.session_state.chat_history = response['chat_history']
-
-    for i, message in enumerate(st.session_state.chat_history):
+    
+    for i, message in enumerate[st.session_state.chat_history]:
         if i % 2 == 0:
-            st.write(user_template.replace(
-                "{{MSG}}", message.content), unsafe_allow_html=True)
+            st.write(user_template.replace("{{MSG}}",message.content),unsafe_allow_html=True)
         else:
-            st.write(bot_template.replace(
-                "{{MSG}}", message.content), unsafe_allow_html=True)
-
-
+            st.write(bot_template.replace("{{MSG}}",message.content),unsafe_allow_html=True) 
+
 def main():
     load_dotenv()
-    st.set_page_config(page_title="Chat with multiple PDFs",
-                       page_icon=":books:")
+    st.set_page_config(page_title="Chat with multiple PDFs",page_icon=":books:")
+
     st.write(css, unsafe_allow_html=True)
-
+    
     if "conversation" not in st.session_state:
         st.session_state.conversation = None
     if "chat_history" not in st.session_state:
         st.session_state.chat_history = None
-
     st.header("Chat with multiple PDFs :books:")
-    user_question = st.text_input("Ask a question about your documents:")
+    user_question = st.text_input("Ask a question your documents:")
     if user_question:
-        handle_userinput(user_question)
+        handle_user_input(user_question)
+    st.write(user_template.replace("{{MSG}}","Hello robot"), unsafe_allow_html=True)
+    st.write(bot_template.replace("{{MSG}}","Hello Human"), unsafe_allow_html=True)
 
     with st.sidebar:
-        st.subheader("Your documents")
-        pdf_docs = st.file_uploader(
-            "Upload your PDFs here and click on 'Process'", accept_multiple_files=True)
+        st.subheader("Your Documents")
+        pdf_docs = st.file_uploader("Upload your PDFs here and click on 'Process'",accept_multiple_files=True)
+
         if st.button("Process"):
             with st.spinner("Processing"):
-                # get pdf text
+                # get pdf text 
                 raw_text = get_pdf_text(pdf_docs)
-
-                # get the text chunks
+                
+                # get the text chunks 
                 text_chunks = get_text_chunks(raw_text)
-
-                # create vector store
-                vectorstore = get_vectorstore(text_chunks)
-
+                
+                # create vector store 
+                vector_store = get_vectorstore(text_chunks)
+                
                 # create conversation chain
-                st.session_state.conversation = get_conversation_chain(
-                    vectorstore)
+                st.session_state.conversation = get_conversation_chain(vector_store)
+
+    st.session_state.conversation        
+
 
 
 if __name__ == '__main__':
-    main()
+    main()