From c83e7f8928abafa7b6c903c8d0c87a1ea8fdcca3 Mon Sep 17 00:00:00 2001 From: Sameer Kulkarni Date: Wed, 21 Aug 2024 12:02:38 +0530 Subject: [PATCH] interim checkin: update multi tenant rag app with chat history --- .envrc | 4 +-- Dockerfile | 1 + app.py | 8 +++--- k8s-manifests/deploy.yaml | 2 ++ multi_tenant_rag.py | 25 ++++++++++++++++--- .../multi_tenant_rag_prompt_template.tmpl | 15 +++++++++++ 6 files changed, 46 insertions(+), 9 deletions(-) create mode 100644 templates/multi_tenant_rag_prompt_template.tmpl diff --git a/.envrc b/.envrc index 73d6bb0..e009bf5 100644 --- a/.envrc +++ b/.envrc @@ -1,10 +1,10 @@ export TGI_HOST=192.168.0.203 export TGI_PORT=80 -export TEI_HOST=192.168.0.202 +export TEI_HOST=192.168.0.204 export TEI_PORT=80 export RERANKER_HOST=192.168.0.205 export RERANKER_PORT=80 -export VECTORDB_HOST=192.168.0.204 +export VECTORDB_HOST=192.168.0.207 export VECTORDB_PORT=8000 export STOP_TOKEN="<|endoftext|>" export HUGGINGFACEHUB_API_TOKEN="$(cat ~/.hf_token)" #Replace with your own Hugging Face API token diff --git a/Dockerfile b/Dockerfile index 05cc563..615319c 100644 --- a/Dockerfile +++ b/Dockerfile @@ -6,6 +6,7 @@ COPY .streamlit/** /app/.streamlit/ COPY templates/** /app/templates/ COPY app.py multi_tenant_rag.py requirements.txt tei_rerank.py /app/ +RUN pip install --upgrade pip RUN pip install --no-cache-dir -r requirements.txt EXPOSE 8051 diff --git a/app.py b/app.py index 93c545e..7ff5a04 100644 --- a/app.py +++ b/app.py @@ -122,9 +122,9 @@ def setup_huggingface_embeddings(): ) return embedder -def load_prompt_and_system_ins(): +def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None): #prompt = hub.pull("hwchase17/react-chat") - prompt = PromptTemplate.from_file("templates/prompt_template.tmpl") + prompt = PromptTemplate.from_file(template_file_path) # Set up prompt template template = """ @@ -192,12 +192,12 @@ def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_s print("Embeddings inserted\n") return db - def query_docs(self, model, question, vector_store, prompt): + def query_docs(self, model, question, vector_store, prompt, chat_history): retriever = vector_store.as_retriever( search_type="similarity", search_kwargs={"k": 4} ) rag_chain = ( - {"context": retriever | format_docs, "question": RunnablePassthrough()} + {"context": retriever | format_docs, "chat_history": chat_history, "question": RunnablePassthrough()} | prompt | model | StrOutputParser() diff --git a/k8s-manifests/deploy.yaml b/k8s-manifests/deploy.yaml index 8d58330..4a8308b 100644 --- a/k8s-manifests/deploy.yaml +++ b/k8s-manifests/deploy.yaml @@ -60,6 +60,8 @@ spec: - name: empty mountPath: /tmp/ dnsPolicy: ClusterFirst + nodeSelector: + kubernetes.io/hostname: infracloud02 restartPolicy: Always schedulerName: default-scheduler securityContext: {} diff --git a/multi_tenant_rag.py b/multi_tenant_rag.py index d4492d5..0d385e3 100644 --- a/multi_tenant_rag.py +++ b/multi_tenant_rag.py @@ -3,7 +3,7 @@ from streamlit_authenticator.utilities import RegisterError, LoginError import os from langchain_community.vectorstores.chroma import Chroma -from app import setup_chroma_client, setup_chroma_embedding_function +from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins from app import setup_huggingface_embeddings, setup_huggingface_endpoint from app import RAG from langchain import hub @@ -67,6 +67,22 @@ def main(): user_id = st.session_state['username'] client = setup_chroma_client() + # Set up prompt template + template = """ + Based on the retrieved context, respond with an accurate answer. + + Be concise and always provide accurate, specific, and relevant information. + """ + + prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template) + + chat_history = st.session_state.get( + "chat_history", [{"role": "system", "content": system_instructions.content}] + ) + + for message in chat_history[1:]: + with st.chat_message(message["role"]): + st.markdown(message["content"]) if user_id: @@ -77,7 +93,7 @@ def main(): rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client) - prompt = hub.pull("rlm/rag-prompt") + # prompt = hub.pull("rlm/rag-prompt") if uploaded_file: document = rag.load_documents(uploaded_file) @@ -93,11 +109,14 @@ def main(): if question := st.chat_input("Chat with your doc"): st.chat_message("user").markdown(question) + chat_history.append({"role": "user", "content": question}) with st.spinner(): answer = rag.query_docs(model=llm, question=question, vector_store=vectorstore, - prompt=prompt) + prompt=prompt, + chat_history=chat_history) + print("####\n#### Answer received by querying docs: " + answer + "\n####") st.chat_message("assistant").markdown(answer) if __name__ == "__main__": diff --git a/templates/multi_tenant_rag_prompt_template.tmpl b/templates/multi_tenant_rag_prompt_template.tmpl new file mode 100644 index 0000000..b348107 --- /dev/null +++ b/templates/multi_tenant_rag_prompt_template.tmpl @@ -0,0 +1,15 @@ +InSightful is a bot developed by InfraCloud Technologies. + +InSightful is used to assist users analyze & get insights in the uploaded pdf files. + +InSightful is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on the pdf files uploaded and the data within. + +Use the following pieces of retrieved context to answer the question. + +Begin! + +Previous conversation history: +{chat_history} + +New question from user: {question} +Context: {context} \ No newline at end of file