Skip to content

Commit

Permalink
interim checkin: update multi tenant rag app with chat history
Browse files Browse the repository at this point in the history
  • Loading branch information
Sameer Kulkarni committed Aug 21, 2024
1 parent 0104be9 commit c83e7f8
Show file tree
Hide file tree
Showing 6 changed files with 46 additions and 9 deletions.
4 changes: 2 additions & 2 deletions .envrc
Original file line number Diff line number Diff line change
@@ -1,10 +1,10 @@
export TGI_HOST=192.168.0.203
export TGI_PORT=80
export TEI_HOST=192.168.0.202
export TEI_HOST=192.168.0.204
export TEI_PORT=80
export RERANKER_HOST=192.168.0.205
export RERANKER_PORT=80
export VECTORDB_HOST=192.168.0.204
export VECTORDB_HOST=192.168.0.207
export VECTORDB_PORT=8000
export STOP_TOKEN="<|endoftext|>"
export HUGGINGFACEHUB_API_TOKEN="$(cat ~/.hf_token)" #Replace with your own Hugging Face API token
Expand Down
1 change: 1 addition & 0 deletions Dockerfile
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@ COPY .streamlit/** /app/.streamlit/
COPY templates/** /app/templates/
COPY app.py multi_tenant_rag.py requirements.txt tei_rerank.py /app/

RUN pip install --upgrade pip
RUN pip install --no-cache-dir -r requirements.txt

EXPOSE 8051
Expand Down
8 changes: 4 additions & 4 deletions app.py
Original file line number Diff line number Diff line change
Expand Up @@ -122,9 +122,9 @@ def setup_huggingface_embeddings():
)
return embedder

def load_prompt_and_system_ins():
def load_prompt_and_system_ins(template_file_path="templates/prompt_template.tmpl", template=None):
#prompt = hub.pull("hwchase17/react-chat")
prompt = PromptTemplate.from_file("templates/prompt_template.tmpl")
prompt = PromptTemplate.from_file(template_file_path)

# Set up prompt template
template = """
Expand Down Expand Up @@ -192,12 +192,12 @@ def insert_embeddings(self, chunks, chroma_embedding_function, embedder, batch_s
print("Embeddings inserted\n")
return db

def query_docs(self, model, question, vector_store, prompt):
def query_docs(self, model, question, vector_store, prompt, chat_history):
retriever = vector_store.as_retriever(
search_type="similarity", search_kwargs={"k": 4}
)
rag_chain = (
{"context": retriever | format_docs, "question": RunnablePassthrough()}
{"context": retriever | format_docs, "chat_history": chat_history, "question": RunnablePassthrough()}
| prompt
| model
| StrOutputParser()
Expand Down
2 changes: 2 additions & 0 deletions k8s-manifests/deploy.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -60,6 +60,8 @@ spec:
- name: empty
mountPath: /tmp/
dnsPolicy: ClusterFirst
nodeSelector:
kubernetes.io/hostname: infracloud02
restartPolicy: Always
schedulerName: default-scheduler
securityContext: {}
Expand Down
25 changes: 22 additions & 3 deletions multi_tenant_rag.py
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@
from streamlit_authenticator.utilities import RegisterError, LoginError
import os
from langchain_community.vectorstores.chroma import Chroma
from app import setup_chroma_client, setup_chroma_embedding_function
from app import setup_chroma_client, setup_chroma_embedding_function, load_prompt_and_system_ins
from app import setup_huggingface_embeddings, setup_huggingface_endpoint
from app import RAG
from langchain import hub
Expand Down Expand Up @@ -67,6 +67,22 @@ def main():
user_id = st.session_state['username']

client = setup_chroma_client()
# Set up prompt template
template = """
Based on the retrieved context, respond with an accurate answer.
Be concise and always provide accurate, specific, and relevant information.
"""

prompt, system_instructions = load_prompt_and_system_ins(template_file_path="templates/multi_tenant_rag_prompt_template.tmpl", template=template)

chat_history = st.session_state.get(
"chat_history", [{"role": "system", "content": system_instructions.content}]
)

for message in chat_history[1:]:
with st.chat_message(message["role"]):
st.markdown(message["content"])

if user_id:

Expand All @@ -77,7 +93,7 @@ def main():

rag = MultiTenantRAG(user_id, llm, embeddings, collection.name, client)

prompt = hub.pull("rlm/rag-prompt")
# prompt = hub.pull("rlm/rag-prompt")

if uploaded_file:
document = rag.load_documents(uploaded_file)
Expand All @@ -93,11 +109,14 @@ def main():

if question := st.chat_input("Chat with your doc"):
st.chat_message("user").markdown(question)
chat_history.append({"role": "user", "content": question})
with st.spinner():
answer = rag.query_docs(model=llm,
question=question,
vector_store=vectorstore,
prompt=prompt)
prompt=prompt,
chat_history=chat_history)
print("####\n#### Answer received by querying docs: " + answer + "\n####")
st.chat_message("assistant").markdown(answer)

if __name__ == "__main__":
Expand Down
15 changes: 15 additions & 0 deletions templates/multi_tenant_rag_prompt_template.tmpl
Original file line number Diff line number Diff line change
@@ -0,0 +1,15 @@
InSightful is a bot developed by InfraCloud Technologies.

InSightful is used to assist users analyze & get insights in the uploaded pdf files.

InSightful is designed to be able to assist with a wide range of tasks, from answering simple questions to providing in-depth explanations and discussions on the pdf files uploaded and the data within.

Use the following pieces of retrieved context to answer the question.

Begin!

Previous conversation history:
{chat_history}

New question from user: {question}
Context: {context}

0 comments on commit c83e7f8

Please sign in to comment.