From 8f2870c0b31d414663e8d15679b2743b45fdf617 Mon Sep 17 00:00:00 2001 From: Mithil Shah Date: Wed, 6 Sep 2023 16:34:33 +1000 Subject: [PATCH 1/4] bedrock --- kendra_retriever_samples/app.py | 17 ++- .../kendra_chat_bedrock.py | 106 ++++++++++++++++++ .../kendra_chat_falcon_40b.py | 11 +- .../kendra_chat_llama_2.py | 16 +-- kendra_retriever_samples/requirements.txt | 2 +- 5 files changed, 129 insertions(+), 23 deletions(-) create mode 100644 kendra_retriever_samples/kendra_chat_bedrock.py diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py index c20ab6e..651167b 100644 --- a/kendra_retriever_samples/app.py +++ b/kendra_retriever_samples/app.py @@ -8,6 +8,7 @@ import kendra_chat_open_ai as openai import kendra_chat_falcon_40b as falcon40b import kendra_chat_llama_2 as llama2 +import kendra_chat_bedrock as bedrock USER_ICON = "images/user-icon.png" AI_ICON = "images/ai-icon.png" @@ -21,6 +22,17 @@ 'llama2' : 'Llama 2' } +#function to read a properties file and create environment variables +def read_properties_file(filename): + import os + import re + with open(filename, 'r') as f: + for line in f: + m = re.match(r'^\s*(\w+)\s*=\s*(.*)\s*$', line) + if m: + os.environ[m.group(1)] = m.group(2) + + # Check if the user ID is already stored in the session state if 'user_id' in st.session_state: user_id = st.session_state['user_id'] @@ -51,10 +63,13 @@ elif (sys.argv[1] == 'llama2'): st.session_state['llm_app'] = llama2 st.session_state['llm_chain'] = llama2.build_chain() + elif (sys.argv[1] == 'bedrock'): + st.session_state['llm_app'] = bedrock + st.session_state['llm_chain'] = bedrock.build_chain() else: raise Exception("Unsupported LLM: ", sys.argv[1]) else: - raise Exception("Usage: streamlit run app.py ") + raise Exception("Usage: streamlit run app.py ") if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] diff --git a/kendra_retriever_samples/kendra_chat_bedrock.py b/kendra_retriever_samples/kendra_chat_bedrock.py new file mode 100644 index 0000000..142daf3 --- /dev/null +++ b/kendra_retriever_samples/kendra_chat_bedrock.py @@ -0,0 +1,106 @@ +# from aws_langchain.kendra import AmazonKendraRetriever #custom library +from langchain.retrievers import AmazonKendraRetriever +from langchain.chains import ConversationalRetrievalChain +from langchain.prompts import PromptTemplate +from langchain.llms.bedrock import Bedrock +import sys +import os + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +MAX_HISTORY_LENGTH = 5 + +def build_chain(): + region = os.environ["AWS_REGION"] + kendra_index_id = os.environ["KENDRA_INDEX_ID"] + credentials_profile_name = os.environ['AWS_PROFILE'] + print(region) + + llm = Bedrock( + credentials_profile_name=credentials_profile_name, + region_name = region, + model_id="amazon.titan-tg1-large" + ) + + retriever = AmazonKendraRetriever(index_id=kendra_index_id,top_k=5,region_name=region) + + + prompt_template = """ + + Human: This is a friendly conversation between a human and an AI. + The AI is talkative and provides specific details from its context but limits it to 240 tokens. + If the AI does not know the answer to a question, it truthfully says it + does not know. + + Assistant: OK, got it, I'll be a talkative truthful AI assistant. + + Human: Here are a few documents in tags: + + {context} + + Based on the above documents, provide a detailed answer for, {question} + Answer "don't know" if not present in the document. + +Assistant: + """ + PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] + ) + + condense_qa_template = """ + Given the following conversation and a follow up question, rephrase the follow up question + to be a standalone question. + + Chat History: + {chat_history} + Follow Up Input: {question} + Standalone question:""" + standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) + + qa = ConversationalRetrievalChain.from_llm( + llm=llm, + retriever=retriever, + condense_question_prompt=standalone_question_prompt, + return_source_documents=True, + combine_docs_chain_kwargs={"prompt":PROMPT}) + + # qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True) + return qa + + +def run_chain(chain, prompt: str, history=[]): + return chain({"question": prompt, "chat_history": history}) + + +if __name__ == "__main__": + chat_history = [] + qa = build_chain() + print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + for query in sys.stdin: + if (query.strip().lower().startswith("new search:")): + query = query.strip().lower().replace("new search:","") + chat_history = [] + elif (len(chat_history) == MAX_HISTORY_LENGTH): + chat_history.pop(0) + result = run_chain(qa, query, chat_history) + chat_history.append((query, result["answer"])) + print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) + if 'source_documents' in result: + print(bcolors.OKGREEN + 'Sources:') + for d in result['source_documents']: + print(d.metadata['source']) + print(bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) diff --git a/kendra_retriever_samples/kendra_chat_falcon_40b.py b/kendra_retriever_samples/kendra_chat_falcon_40b.py index 72c4bac..d187e8d 100644 --- a/kendra_retriever_samples/kendra_chat_falcon_40b.py +++ b/kendra_retriever_samples/kendra_chat_falcon_40b.py @@ -30,14 +30,12 @@ class ContentHandler(LLMContentHandler): accepts = "application/json" def transform_input(self, prompt: str, model_kwargs: dict) -> bytes: - prompt = prompt[:1023] + input_str = json.dumps({"inputs": prompt, "parameters": model_kwargs}) - print("input_str", input_str) return input_str.encode('utf-8') def transform_output(self, output: bytes) -> str: response_json = json.loads(output.read().decode("utf-8")) - print(response_json) return response_json[0]["generated_text"] content_handler = ContentHandler() @@ -47,7 +45,6 @@ def transform_output(self, output: bytes) -> str: region_name=region, model_kwargs={ "temperature": 0.8, - "max_length": 10000, "max_new_tokens": 512, "do_sample": True, "top_p": 0.9, @@ -57,13 +54,9 @@ def transform_output(self, output: bytes) -> str: content_handler=content_handler ) - retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region) + retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region, top_k=2) prompt_template = """ - The following is a friendly conversation between a human and an AI. - The AI is talkative and provides lots of specific details from its context. - If the AI does not know the answer to a question, it truthfully says it - does not know. {context} Instruction: Based on the above documents, provide a detailed answer for, {question} Answer "don't know" if not present in the document. diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py index 7c2d527..86d96eb 100644 --- a/kendra_retriever_samples/kendra_chat_llama_2.py +++ b/kendra_retriever_samples/kendra_chat_llama_2.py @@ -30,19 +30,13 @@ class ContentHandler(LLMContentHandler): accepts = "application/json" def transform_input(self, prompt: str, model_kwargs: dict) -> bytes: - input_str = json.dumps({"inputs": - [[ - #{"role": "system", "content": ""}, - {"role": "user", "content": prompt}, - ]], - **model_kwargs + input_str = json.dumps({"inputs": [[{"role": "user", "content": prompt},]], + "parameters" : model_kwargs }) - print(input_str) return input_str.encode('utf-8') def transform_output(self, output: bytes) -> str: - response_json = json.loads(output.read().decode("utf-8")) - + response_json = json.loads(output.read().decode("utf-8")) return response_json[0]['generation']['content'] content_handler = ContentHandler() @@ -50,7 +44,7 @@ def transform_output(self, output: bytes) -> str: llm=SagemakerEndpoint( endpoint_name=endpoint_name, region_name=region, - model_kwargs={"max_new_tokens": 1000, "top_p": 0.9,"temperature":0.6}, + model_kwargs={"max_new_tokens": 1500, "top_p": 0.8,"temperature":0.6}, endpoint_kwargs={"CustomAttributes":"accept_eula=true"}, content_handler=content_handler, ) @@ -69,7 +63,6 @@ def transform_output(self, output: bytes) -> str: PROMPT = PromptTemplate( template=prompt_template, input_variables=["context", "question"], ) - condense_qa_template = """ Given the following conversation and a follow up question, rephrase the follow up question to be a standalone question. @@ -90,7 +83,6 @@ def transform_output(self, output: bytes) -> str: return qa def run_chain(chain, prompt: str, history=[]): - print(prompt) return chain({"question": prompt, "chat_history": history}) if __name__ == "__main__": diff --git a/kendra_retriever_samples/requirements.txt b/kendra_retriever_samples/requirements.txt index 8c93dfc..f58490b 100644 --- a/kendra_retriever_samples/requirements.txt +++ b/kendra_retriever_samples/requirements.txt @@ -1,4 +1,4 @@ -langchain==0.0.263 +langchain==0.0.279 boto3>=1.28.27 openai anthropic From 0ba0b20fea7519a4837fe6514ee29ffc6f574239 Mon Sep 17 00:00:00 2001 From: Mithil Shah Date: Tue, 12 Sep 2023 14:13:21 +1000 Subject: [PATCH 2/4] bedrock --- kendra_retriever_samples/README.md | 3 + kendra_retriever_samples/app.py | 18 ++- .../kendra_chat_bedrock_claude.py | 112 ++++++++++++++++++ .../kendra_chat_bedrock_claudev2.py | 112 ++++++++++++++++++ ...edrock.py => kendra_chat_bedrock_titan.py} | 4 +- .../kendra_chat_llama_2.py | 1 + 6 files changed, 245 insertions(+), 5 deletions(-) create mode 100644 kendra_retriever_samples/kendra_chat_bedrock_claude.py create mode 100644 kendra_retriever_samples/kendra_chat_bedrock_claudev2.py rename kendra_retriever_samples/{kendra_chat_bedrock.py => kendra_chat_bedrock_titan.py} (98%) diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md index 2110deb..24e072a 100644 --- a/kendra_retriever_samples/README.md +++ b/kendra_retriever_samples/README.md @@ -40,6 +40,7 @@ Before you run the sample, you need to deploy a Large Language Model (or get an | Flan XXL | FLAN_XXL_ENDPOINT | huggingface-text2text-flan-t5-xxl | flanxxl | | Falcon 40B instruct | FALCON_40B_ENDPOINT | huggingface-llm-falcon-40b-instruct-bf16 | falcon40b | | Llama2 70B instruct | LLAMA_2_ENDPOINT | meta-textgeneration-llama-2-70b-f | llama2 | +| Bedrock | BEDROCK_MODEL |amazon.titan-tg1-large | bedrock| after deploying the LLM, set up environment variables for kendra id, aws_region and the endpoint name (or the API key for an external provider) @@ -50,7 +51,9 @@ You can use commands as below to set the environment variables. Only set the env ```bash export AWS_REGION="" +export AWS_PROFILE=bedrock export KENDRA_INDEX_ID="" + export FLAN_XL_ENDPOINT="" # only if you are using FLAN_XL export FLAN_XXL_ENDPOINT="" # only if you are using FLAN_XXL export FALCON_40B_ENDPOINT="" # only if you are using falcon as the endpoint diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py index 651167b..1406cac 100644 --- a/kendra_retriever_samples/app.py +++ b/kendra_retriever_samples/app.py @@ -8,7 +8,11 @@ import kendra_chat_open_ai as openai import kendra_chat_falcon_40b as falcon40b import kendra_chat_llama_2 as llama2 -import kendra_chat_bedrock as bedrock +import kendra_chat_bedrock_titan as bedrock_titan +import kendra_chat_bedrock_claude as bedrock_claude +import kendra_chat_bedrock_claudev2 as bedrock_claudev2 + + USER_ICON = "images/user-icon.png" AI_ICON = "images/ai-icon.png" @@ -63,9 +67,15 @@ def read_properties_file(filename): elif (sys.argv[1] == 'llama2'): st.session_state['llm_app'] = llama2 st.session_state['llm_chain'] = llama2.build_chain() - elif (sys.argv[1] == 'bedrock'): - st.session_state['llm_app'] = bedrock - st.session_state['llm_chain'] = bedrock.build_chain() + elif (sys.argv[1] == 'bedrock_titan'): + st.session_state['llm_app'] = bedrock_titan + st.session_state['llm_chain'] = bedrock_titan.build_chain() + elif (sys.argv[1] == 'bedrock_claude'): + st.session_state['llm_app'] = bedrock_claude + st.session_state['llm_chain'] = bedrock_claude.build_chain() + elif (sys.argv[1] == 'bedrock_claudev2'): + st.session_state['llm_app'] = bedrock_claudev2 + st.session_state['llm_chain'] = bedrock_claudev2.build_chain() else: raise Exception("Unsupported LLM: ", sys.argv[1]) else: diff --git a/kendra_retriever_samples/kendra_chat_bedrock_claude.py b/kendra_retriever_samples/kendra_chat_bedrock_claude.py new file mode 100644 index 0000000..868dfe9 --- /dev/null +++ b/kendra_retriever_samples/kendra_chat_bedrock_claude.py @@ -0,0 +1,112 @@ +# from aws_langchain.kendra import AmazonKendraRetriever #custom library +from langchain.retrievers import AmazonKendraRetriever +from langchain.chains import ConversationalRetrievalChain +from langchain.prompts import PromptTemplate +from langchain.llms.bedrock import Bedrock +from langchain.chains.llm import LLMChain +import sys +import os + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +MAX_HISTORY_LENGTH = 5 + +def build_chain(): + region = os.environ["AWS_REGION"] + kendra_index_id = os.environ["KENDRA_INDEX_ID"] + credentials_profile_name = os.environ['AWS_PROFILE'] + + print(credentials_profile_name) + + + llm = Bedrock( + credentials_profile_name=credentials_profile_name, + region_name = region, + model_kwargs={"max_tokens_to_sample":300,"temperature":1,"top_k":250,"top_p":0.999,"anthropic_version":"bedrock-2023-05-31"}, + model_id="anthropic.claude-v1" + ) + + retriever = AmazonKendraRetriever(index_id=kendra_index_id,top_k=5,region_name=region) + + + prompt_template = """Human: This is a friendly conversation between a human and an AI. + The AI is talkative and provides specific details from its context but limits it to 240 tokens. + If the AI does not know the answer to a question, it truthfully says it + does not know. + + Assistant: OK, got it, I'll be a talkative truthful AI assistant. + + Human: Here are a few documents in tags: + + {context} + + Based on the above documents, provide a detailed answer for, {question} + Answer "don't know" if not present in the document. + + Assistant: + """ + PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] + ) + + condense_qa_template = """Human: + Given the following conversation and a follow up question, rephrase the follow up question + to be a standalone question. + Chat History: + {chat_history} + Follow Up Input: {question} + Standalone question: + + Assistant:""" + standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) + + + + qa = ConversationalRetrievalChain.from_llm( + llm=llm, + retriever=retriever, + condense_question_prompt=standalone_question_prompt, + return_source_documents=True, + combine_docs_chain_kwargs={"prompt":PROMPT}, + verbose=True) + + # qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True) + return qa + + +def run_chain(chain, prompt: str, history=[]): + return chain({"question": prompt, "chat_history": history}) + + +if __name__ == "__main__": + chat_history = [] + qa = build_chain() + print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + for query in sys.stdin: + if (query.strip().lower().startswith("new search:")): + query = query.strip().lower().replace("new search:","") + chat_history = [] + elif (len(chat_history) == MAX_HISTORY_LENGTH): + chat_history.pop(0) + result = run_chain(qa, query, chat_history) + chat_history.append((query, result["answer"])) + print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) + if 'source_documents' in result: + print(bcolors.OKGREEN + 'Sources:') + for d in result['source_documents']: + print(d.metadata['source']) + print(bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) diff --git a/kendra_retriever_samples/kendra_chat_bedrock_claudev2.py b/kendra_retriever_samples/kendra_chat_bedrock_claudev2.py new file mode 100644 index 0000000..b9e44bd --- /dev/null +++ b/kendra_retriever_samples/kendra_chat_bedrock_claudev2.py @@ -0,0 +1,112 @@ +# from aws_langchain.kendra import AmazonKendraRetriever #custom library +from langchain.retrievers import AmazonKendraRetriever +from langchain.chains import ConversationalRetrievalChain +from langchain.prompts import PromptTemplate +from langchain.llms.bedrock import Bedrock +from langchain.chains.llm import LLMChain +import sys +import os + +class bcolors: + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKCYAN = '\033[96m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + +MAX_HISTORY_LENGTH = 5 + +def build_chain(): + region = os.environ["AWS_REGION"] + kendra_index_id = os.environ["KENDRA_INDEX_ID"] + credentials_profile_name = os.environ['AWS_PROFILE'] + + print(credentials_profile_name) + + + llm = Bedrock( + credentials_profile_name=credentials_profile_name, + region_name = region, + model_kwargs={"max_tokens_to_sample":300,"temperature":1,"top_k":250,"top_p":0.999,"anthropic_version":"bedrock-2023-05-31"}, + model_id="anthropic.claude-v2" + ) + + retriever = AmazonKendraRetriever(index_id=kendra_index_id,top_k=5,region_name=region) + + + prompt_template = """Human: This is a friendly conversation between a human and an AI. + The AI is talkative and provides specific details from its context but limits it to 240 tokens. + If the AI does not know the answer to a question, it truthfully says it + does not know. + + Assistant: OK, got it, I'll be a talkative truthful AI assistant. + + Human: Here are a few documents in tags: + + {context} + + Based on the above documents, provide a detailed answer for, {question} + Answer "don't know" if not present in the document. + + Assistant: + """ + PROMPT = PromptTemplate( + template=prompt_template, input_variables=["context", "question"] + ) + + condense_qa_template = """Human: + Given the following conversation and a follow up question, rephrase the follow up question + to be a standalone question. + Chat History: + {chat_history} + Follow Up Input: {question} + Standalone question: + + Assistant:""" + standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) + + + + qa = ConversationalRetrievalChain.from_llm( + llm=llm, + retriever=retriever, + condense_question_prompt=standalone_question_prompt, + return_source_documents=True, + combine_docs_chain_kwargs={"prompt":PROMPT}, + verbose=True) + + # qa = ConversationalRetrievalChain.from_llm(llm=llm, retriever=retriever, qa_prompt=PROMPT, return_source_documents=True) + return qa + + +def run_chain(chain, prompt: str, history=[]): + return chain({"question": prompt, "chat_history": history}) + + +if __name__ == "__main__": + chat_history = [] + qa = build_chain() + print(bcolors.OKBLUE + "Hello! How can I help you?" + bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + for query in sys.stdin: + if (query.strip().lower().startswith("new search:")): + query = query.strip().lower().replace("new search:","") + chat_history = [] + elif (len(chat_history) == MAX_HISTORY_LENGTH): + chat_history.pop(0) + result = run_chain(qa, query, chat_history) + chat_history.append((query, result["answer"])) + print(bcolors.OKGREEN + result['answer'] + bcolors.ENDC) + if 'source_documents' in result: + print(bcolors.OKGREEN + 'Sources:') + for d in result['source_documents']: + print(d.metadata['source']) + print(bcolors.ENDC) + print(bcolors.OKCYAN + "Ask a question, start a New search: or CTRL-D to exit." + bcolors.ENDC) + print(">", end=" ", flush=True) + print(bcolors.OKBLUE + "Bye" + bcolors.ENDC) diff --git a/kendra_retriever_samples/kendra_chat_bedrock.py b/kendra_retriever_samples/kendra_chat_bedrock_titan.py similarity index 98% rename from kendra_retriever_samples/kendra_chat_bedrock.py rename to kendra_retriever_samples/kendra_chat_bedrock_titan.py index 142daf3..39f0b31 100644 --- a/kendra_retriever_samples/kendra_chat_bedrock.py +++ b/kendra_retriever_samples/kendra_chat_bedrock_titan.py @@ -23,7 +23,9 @@ def build_chain(): region = os.environ["AWS_REGION"] kendra_index_id = os.environ["KENDRA_INDEX_ID"] credentials_profile_name = os.environ['AWS_PROFILE'] - print(region) + + print(credentials_profile_name) + llm = Bedrock( credentials_profile_name=credentials_profile_name, diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py index 86d96eb..ad7d8ce 100644 --- a/kendra_retriever_samples/kendra_chat_llama_2.py +++ b/kendra_retriever_samples/kendra_chat_llama_2.py @@ -72,6 +72,7 @@ def transform_output(self, output: bytes) -> str: Follow Up Input: {question} Standalone question:""" standalone_question_prompt = PromptTemplate.from_template(condense_qa_template) + qa = ConversationalRetrievalChain.from_llm( llm=llm, From e728edbea65d6794f86ed572f26d4916bfb9002a Mon Sep 17 00:00:00 2001 From: Mithil Shah Date: Tue, 12 Sep 2023 15:41:25 +1000 Subject: [PATCH 3/4] latest models --- kendra_retriever_samples/README.md | 22 ++++++++++--------- kendra_retriever_samples/app.py | 2 +- .../kendra_chat_falcon_40b.py | 1 + .../kendra_chat_llama_2.py | 2 ++ 4 files changed, 16 insertions(+), 11 deletions(-) diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md index 24e072a..ca176f4 100644 --- a/kendra_retriever_samples/README.md +++ b/kendra_retriever_samples/README.md @@ -40,7 +40,9 @@ Before you run the sample, you need to deploy a Large Language Model (or get an | Flan XXL | FLAN_XXL_ENDPOINT | huggingface-text2text-flan-t5-xxl | flanxxl | | Falcon 40B instruct | FALCON_40B_ENDPOINT | huggingface-llm-falcon-40b-instruct-bf16 | falcon40b | | Llama2 70B instruct | LLAMA_2_ENDPOINT | meta-textgeneration-llama-2-70b-f | llama2 | -| Bedrock | BEDROCK_MODEL |amazon.titan-tg1-large | bedrock| +| Bedrock Titan | None | | bedrock_titan| +| Bedrock Claude | None | | bedrock_claude| +| Bedrock Claude V2 | None | | bedrock_claudev2| after deploying the LLM, set up environment variables for kendra id, aws_region and the endpoint name (or the API key for an external provider) @@ -50,17 +52,17 @@ For example, for running the `kendra_chat_flan_xl.py` sample, these environment You can use commands as below to set the environment variables. Only set the environment variable for the provider that you are using. For example, if you are using Flan-xl only set the FLAN_XXL_ENDPOINT. There is no need to set the other Endpoints and keys. ```bash -export AWS_REGION="" -export AWS_PROFILE=bedrock -export KENDRA_INDEX_ID="" +export AWS_REGION= +export AWS_PROFILE= +export KENDRA_INDEX_ID= -export FLAN_XL_ENDPOINT="" # only if you are using FLAN_XL -export FLAN_XXL_ENDPOINT="" # only if you are using FLAN_XXL -export FALCON_40B_ENDPOINT="" # only if you are using falcon as the endpoint -export LLAMA_2_ENDPOINT="" #only if you are using llama2 as the endpoint +export FLAN_XL_ENDPOINT= # only if you are using FLAN_XL +export FLAN_XXL_ENDPOINT= # only if you are using FLAN_XXL +export FALCON_40B_ENDPOINT= # only if you are using falcon as the endpoint +export LLAMA_2_ENDPOINT= #only if you are using llama2 as the endpoint -export OPENAI_API_KEY="" # only if you are using OPENAI as the endpoint -export ANTHROPIC_API_KEY="" # only if you are using Anthropic as the endpoint +export OPENAI_API_KEY= # only if you are using OPENAI as the endpoint +export ANTHROPIC_API_KEY= # only if you are using Anthropic as the endpoint ``` diff --git a/kendra_retriever_samples/app.py b/kendra_retriever_samples/app.py index 1406cac..34613b9 100644 --- a/kendra_retriever_samples/app.py +++ b/kendra_retriever_samples/app.py @@ -79,7 +79,7 @@ def read_properties_file(filename): else: raise Exception("Unsupported LLM: ", sys.argv[1]) else: - raise Exception("Usage: streamlit run app.py ") + raise Exception("Usage: streamlit run app.py ") if 'chat_history' not in st.session_state: st.session_state['chat_history'] = [] diff --git a/kendra_retriever_samples/kendra_chat_falcon_40b.py b/kendra_retriever_samples/kendra_chat_falcon_40b.py index d187e8d..0262f68 100644 --- a/kendra_retriever_samples/kendra_chat_falcon_40b.py +++ b/kendra_retriever_samples/kendra_chat_falcon_40b.py @@ -80,6 +80,7 @@ def transform_output(self, output: bytes) -> str: retriever=retriever, condense_question_prompt=standalone_question_prompt, return_source_documents=True, + verbose =True, combine_docs_chain_kwargs={"prompt":PROMPT}) return qa diff --git a/kendra_retriever_samples/kendra_chat_llama_2.py b/kendra_retriever_samples/kendra_chat_llama_2.py index ad7d8ce..4dd111b 100644 --- a/kendra_retriever_samples/kendra_chat_llama_2.py +++ b/kendra_retriever_samples/kendra_chat_llama_2.py @@ -47,6 +47,7 @@ def transform_output(self, output: bytes) -> str: model_kwargs={"max_new_tokens": 1500, "top_p": 0.8,"temperature":0.6}, endpoint_kwargs={"CustomAttributes":"accept_eula=true"}, content_handler=content_handler, + ) retriever = AmazonKendraRetriever(index_id=kendra_index_id,region_name=region) @@ -80,6 +81,7 @@ def transform_output(self, output: bytes) -> str: condense_question_prompt=standalone_question_prompt, return_source_documents=True, combine_docs_chain_kwargs={"prompt":PROMPT}, + verbose=True ) return qa From 3dbc203261a3f957259080d6b49724c9fe30f872 Mon Sep 17 00:00:00 2001 From: Mithil Shah Date: Wed, 13 Sep 2023 09:58:23 +1000 Subject: [PATCH 4/4] bedrock --- kendra_retriever_samples/README.md | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/kendra_retriever_samples/README.md b/kendra_retriever_samples/README.md index ca176f4..2ad7536 100644 --- a/kendra_retriever_samples/README.md +++ b/kendra_retriever_samples/README.md @@ -30,6 +30,15 @@ If you are using Conda conda env create -f environment.yml ``` +### For Bedrock +If you are using Bedrock, make sure that you have a boto3 client with bedrock library and you use an AWS_PROFILE that has access to bedrock. + +``` +wget https://xxxxx/Documentation/SDK/bedrock-python-sdk.zip +unzip bedrock-python-sdk.zip +pip install *.whl +``` + ## Running samples Before you run the sample, you need to deploy a Large Language Model (or get an API key if you using Anthropic or OPENAI). The samples in this repository have been tested on models deployed using SageMaker Jumpstart. The model id for the LLMS are specified in the table below.