From c83e7f8928abafa7b6c903c8d0c87a1ea8fdcca3 Mon Sep 17 00:00:00 2001
From: Sameer Kulkarni <sameer@acalvio.io>
Date: Wed, 21 Aug 2024 12:02:38 +0530
Subject: [PATCH] interim checkin: update multi tenant rag app with chat
 history

---
 .envrc                                        |  4 +--
 Dockerfile                                    |  1 +
 app.py                                        |  8 +++---
 k8s-manifests/deploy.yaml                     |  2 ++
 multi_tenant_rag.py                           | 25 ++++++++++++++++---
 .../multi_tenant_rag_prompt_template.tmpl     | 15 +++++++++++
 6 files changed, 46 insertions(+), 9 deletions(-)
 create mode 100644 templates/multi_tenant_rag_prompt_template.tmpl

diff --git a/.envrc b/.envrc
index 73d6bb0..e009bf5 100644
--- a/.envrc
+++ b/.envrc
@@ -1,10 +1,10 @@
 export TGI_HOST=192.168.0.203
 export TGI_PORT=80
-export TEI_HOST=192.168.0.202
+export TEI_HOST=192.168.0.204
 export TEI_PORT=80
 export RERANKER_HOST=192.168.0.205
 export RERANKER_PORT=80
-export VECTORDB_HOST=192.168.0.204
+export VECTORDB_HOST=192.168.0.207
 export VECTORDB_PORT=8000
 export STOP_TOKEN="<|endoftext|>"
 export HUGGINGFACEHUB_API_TOKEN="$(cat ~/.hf_token)" #Replace with your own Hugging Face API token
diff --git a/Dockerfile b/Dockerfile
index 05cc563..615319c 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -6,6 +6,7 @@ COPY .streamlit/** /app/.streamlit/
 COPY templates/** /app/templates/
 COPY app.py multi_tenant_rag.py requirements.txt tei_rerank.py /app/
 
+RUN pip install --upgrade pip
 RUN pip install --no-cache-dir -r requirements.txt
 
 EXPOSE 8051
diff --git a/app.py b/app.py
index 93c545e..7ff5a04 100644
--- a/app.py
+++ b/app.py
@@ -122,9 +122,9 @@ def setup_huggingface_embeddings():
     )
     return embedder
 
-def load_prompt_and_system_ins():
+def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None):
     #prompt = hub.pull("hwchase17/react-chat")
-    prompt = PromptTemplate.from_file("templates/prompt_template.tmpl")
+    prompt = PromptTemplate.from_file(template_file_path)
     
     # Set up prompt template
     template = """
@@ -192,12 +192,12 @@ def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_s
         print("Embeddings inserted\n")
         return db
 
-    def query_docs(self, model, question, vector_store, prompt):
+    def query_docs(self, model, question, vector_store, prompt, chat_history):
         retriever = vector_store.as_retriever(
             search_type="similarity", search_kwargs={"k": 4}
         )
         rag_chain = (
-            {"context": retriever | format_docs, "question": RunnablePassthrough()}
+            {"context": retriever | format_docs, "chat_history": chat_history, "question": RunnablePassthrough()}
             | prompt
             | model
             | StrOutputParser()
diff --git a/k8s-manifests/deploy.yaml b/k8s-manifests/deploy.yaml
index 8d58330..4a8308b 100644
--- a/k8s-manifests/deploy.yaml
+++ b/k8s-manifests/deploy.yaml
@@ -60,6 +60,8 @@ spec:
         - name: empty
           mountPath: /tmp/
       dnsPolicy: ClusterFirst
+      nodeSelector:
+        kubernetes.io/hostname: infracloud02      
       restartPolicy: Always
       schedulerName: default-scheduler
       securityContext: {}
diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py
index d4492d5..0d385e3 100644
--- a/multi_tenant_rag.py
+++ b/multi_tenant_rag.py
@@ -3,7 +3,7 @@
 from streamlit_authenticator.utilities import RegisterError, LoginError
 import os
 from langchain_community.vectorstores.chroma import Chroma
-from app import setup_chroma_client, setup_chroma_embedding_function
+from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins
 from app import setup_huggingface_embeddings, setup_huggingface_endpoint
 from app import RAG
 from langchain import hub
@@ -67,6 +67,22 @@ def main():
     user_id = st.session_state['username']
 
     client = setup_chroma_client()
+    # Set up prompt template
+    template = """
+    Based on the retrieved context, respond with an accurate answer.
+
+    Be concise and always provide accurate, specific, and relevant information.
+    """
+    
+    prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template)
+    
+    chat_history = st.session_state.get(
+        "chat_history", [{"role": "system", "content": system_instructions.content}]
+    )
+
+    for message in chat_history[1:]:
+        with st.chat_message(message["role"]):
+            st.markdown(message["content"])
 
     if user_id:
 
@@ -77,7 +93,7 @@ def main():
 
         rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client)
 
-        prompt = hub.pull("rlm/rag-prompt")
+        # prompt = hub.pull("rlm/rag-prompt")
 
         if uploaded_file:
             document = rag.load_documents(uploaded_file)
@@ -93,11 +109,14 @@ def main():
             
         if question := st.chat_input("Chat with your doc"):
             st.chat_message("user").markdown(question)
+            chat_history.append({"role": "user", "content": question})
             with st.spinner():
                 answer = rag.query_docs(model=llm,
                                     question=question,
                                     vector_store=vectorstore,
-                                    prompt=prompt)
+                                    prompt=prompt,
+                                    chat_history=chat_history)
+                print("####\n#### Answer received by querying docs: " + answer + "\n####")
                 st.chat_message("assistant").markdown(answer)
 
 if __name__ == "__main__":
diff --git a/templates/multi_tenant_rag_prompt_template.tmpl b/templates/multi_tenant_rag_prompt_template.tmpl
new file mode 100644
index 0000000..b348107
--- /dev/null
+++ b/templates/multi_tenant_rag_prompt_template.tmpl
@@ -0,0 +1,15 @@
+InSightful is a bot developed by InfraCloud Technologies.
+
+InSightful is used to assist users analyze & get insights in the uploaded pdf files.
+
+InSightful is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on the pdf files uploaded and the data within.
+
+Use the following pieces of retrieved context to answer the question.
+
+Begin!
+
+Previous conversation history:
+{chat_history}
+
+New question from user: {question}
+Context: {context}
\ No newline at end of file