Skip to content

Commit

Permalink
pushing tested code for blog
Browse files Browse the repository at this point in the history
  • Loading branch information
jeffvestal committed Apr 11, 2023
1 parent 2daa916 commit 93df752
Show file tree
Hide file tree
Showing 4 changed files with 127 additions and 0 deletions.
5 changes: 5 additions & 0 deletions .gitignore
Original file line number Diff line number Diff line change
@@ -0,0 +1,5 @@
*replit*
*pyc
.config
venv
main-notokenlimit.py
11 changes: 11 additions & 0 deletions .streamlit/config.toml
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
[logger]

# Level of logging: 'error', 'warning', 'info', or 'debug'.
# Default: 'info'
level = "info"

[server]

# If false, will attempt to open a browser window on start.
# Default: false unless (1) we are on a Linux box where DISPLAY is unset, or (2) we are running in the Streamlit Atom plugin.
headless = true
108 changes: 108 additions & 0 deletions elasticdocs_gpt.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,108 @@
import os
import streamlit as st
import openai
from elasticsearch import Elasticsearch

# This code is part of an Elastic Blog showing how to combine
# Elasticsearch's search relevancy power with
# OpenAI's GPT's Question Answering power


# Required Environment Variables
# openai_api - OpenAI API Key
# cloud_id - Elastic Cloud Deployment ID
# cloud_user - Elasticsearch Cluster User
# cloud_pass - Elasticsearch User Password

openai.api_key = os.environ['openai_api']
model = "gpt-3.5-turbo-0301"

# Connect to Elastic Cloud cluster
def es_connect(cid, user, passwd):
es = Elasticsearch(cloud_id=cid, http_auth=(user, passwd))
return es

# Search ElasticSearch index and return body and URL of the result
def search(query_text):
cid = os.environ['cloud_id']
cp = os.environ['cloud_pass']
cu = os.environ['cloud_user']
es = es_connect(cid, cu, cp)

# Elasticsearch query (BM25) and kNN configuration for hybrid search
query = {
"bool": {
"must": [{
"match": {
"title": {
"query": query_text,
"boost": 1
}
}
}],
"filter": [{
"exists": {
"field": "title-vector"
}
}]
}
}

knn = {
"field": "title-vector",
"k": 1,
"num_candidates": 20,
"query_vector_builder": {
"text_embedding": {
"model_id": "sentence-transformers__all-distilroberta-v1",
"model_text": query_text
}
},
"boost": 24
}

fields = ["title", "body_content", "url"]
index = 'search-elastic-docs'
resp = es.search(index=index,
query=query,
knn=knn,
fields=fields,
size=1,
source=False)

body = resp['hits']['hits'][0]['fields']['body_content'][0]
url = resp['hits']['hits'][0]['fields']['url'][0]

return body, url

def truncate_text(text, max_tokens):
tokens = text.split()
if len(tokens) <= max_tokens:
return text

return ' '.join(tokens[:max_tokens])

# Generate a response from ChatGPT based on the given prompt
def chat_gpt(prompt, model="gpt-3.5-turbo", max_tokens=1024, max_context_tokens=4000, safety_margin=5):
# Truncate the prompt content to fit within the model's context length
truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin)

response = openai.ChatCompletion.create(model=model,
messages=[{"role": "system", "content": "You are a helpful assistant."}, {"role": "user", "content": truncated_prompt}])

return response["choices"][0]["message"]["content"]


st.title("ElasticDocs GPT")

# Main chat form
with st.form("chat_form"):
query = st.text_input("You: ")
submit_button = st.form_submit_button("Send")

# Generate and display response on form submission
if submit_button:
resp, url = search(query)
prompt = f"Answer this question: {query}\nUsing only the information from this Elastic Doc: {resp}"
answer = chat_gpt(prompt)
st.write(f"ChatGPT: {answer.strip()}\n\nDocs: {url}")
3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
streamlit
openai
elasticsearch

0 comments on commit 93df752

Please sign in to comment.