interim checkin: update multi tenant rag app with chat history

infracloudio · Aug 21, 2024 · c83e7f8 · c83e7f8
1 parent 0104be9
commit c83e7f8
Show file tree

Hide file tree

Showing 6 changed files with 46 additions and 9 deletions.
diff --git a/.envrc b/.envrc
@@ -1,10 +1,10 @@
 export TGI_HOST=192.168.0.203
 export TGI_PORT=80
-export TEI_HOST=192.168.0.202
+export TEI_HOST=192.168.0.204
 export TEI_PORT=80
 export RERANKER_HOST=192.168.0.205
 export RERANKER_PORT=80
-export VECTORDB_HOST=192.168.0.204
+export VECTORDB_HOST=192.168.0.207
 export VECTORDB_PORT=8000
 export STOP_TOKEN="<|endoftext|>"
 export HUGGINGFACEHUB_API_TOKEN="$(cat ~/.hf_token)" #Replace with your own Hugging Face API token

diff --git a/Dockerfile b/Dockerfile
@@ -6,6 +6,7 @@ COPY .streamlit/** /app/.streamlit/
 COPY templates/** /app/templates/
 COPY app.py multi_tenant_rag.py requirements.txt tei_rerank.py /app/
 
+RUN pip install --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
 
 EXPOSE 8051

diff --git a/app.py b/app.py
@@ -122,9 +122,9 @@ def setup_huggingface_embeddings():
     )
     return embedder
 
-def load_prompt_and_system_ins():
+def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None):
     #prompt = hub.pull("hwchase17/react-chat")
-    prompt = PromptTemplate.from_file("templates/prompt_template.tmpl")
+    prompt = PromptTemplate.from_file(template_file_path)
 
     # Set up prompt template
     template = """
@@ -192,12 +192,12 @@ def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_s
         print("Embeddings inserted\n")
         return db
 
-    def query_docs(self, model, question, vector_store, prompt):
+    def query_docs(self, model, question, vector_store, prompt, chat_history):
         retriever = vector_store.as_retriever(
             search_type="similarity", search_kwargs={"k": 4}
         )
         rag_chain = (
-            {"context": retriever | format_docs, "question": RunnablePassthrough()}
+            {"context": retriever | format_docs, "chat_history": chat_history, "question": RunnablePassthrough()}
             | prompt
             | model
             | StrOutputParser()

diff --git a/k8s-manifests/deploy.yaml b/k8s-manifests/deploy.yaml
@@ -60,6 +60,8 @@ spec:
         - name: empty
           mountPath: /tmp/
       dnsPolicy: ClusterFirst
+      nodeSelector:
+        kubernetes.io/hostname: infracloud02      
       restartPolicy: Always
       schedulerName: default-scheduler
       securityContext: {}

diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py
@@ -3,7 +3,7 @@
 from streamlit_authenticator.utilities import RegisterError, LoginError
 import os
 from langchain_community.vectorstores.chroma import Chroma
-from app import setup_chroma_client, setup_chroma_embedding_function
+from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins
 from app import setup_huggingface_embeddings, setup_huggingface_endpoint
 from app import RAG
 from langchain import hub
@@ -67,6 +67,22 @@ def main():
     user_id = st.session_state['username']
 
     client = setup_chroma_client()
+    # Set up prompt template
+    template = """
+    Based on the retrieved context, respond with an accurate answer.
+
+    Be concise and always provide accurate, specific, and relevant information.
+    """
+
+    prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template)
+
+    chat_history = st.session_state.get(
+        "chat_history", [{"role": "system", "content": system_instructions.content}]
+    )
+
+    for message in chat_history[1:]:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
 
     if user_id:
 
@@ -77,7 +93,7 @@ def main():
 
         rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client)
 
-        prompt = hub.pull("rlm/rag-prompt")
+        # prompt = hub.pull("rlm/rag-prompt")
 
         if uploaded_file:
             document = rag.load_documents(uploaded_file)
@@ -93,11 +109,14 @@ def main():
 
         if question := st.chat_input("Chat with your doc"):
             st.chat_message("user").markdown(question)
+            chat_history.append({"role": "user", "content": question})
             with st.spinner():
                 answer = rag.query_docs(model=llm,
                                     question=question,
                                     vector_store=vectorstore,
-                                    prompt=prompt)
+                                    prompt=prompt,
+                                    chat_history=chat_history)
+                print("####\n#### Answer received by querying docs: " + answer + "\n####")
                 st.chat_message("assistant").markdown(answer)
 
 if __name__ == "__main__":

diff --git a/templates/multi_tenant_rag_prompt_template.tmpl b/templates/multi_tenant_rag_prompt_template.tmpl
@@ -0,0 +1,15 @@
+InSightful is a bot developed by InfraCloud Technologies.
+
+InSightful is used to assist users analyze & get insights in the uploaded pdf files.
+
+InSightful is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on the pdf files uploaded and the data within.
+
+Use the following pieces of retrieved context to answer the question.
+
+Begin!
+
+Previous conversation history:
+{chat_history}
+
+New question from user: {question}
+Context: {context}