Merge pull request #2 from elastic/dev

Openai upgrade and tested wave1
elastic · Dec 14, 2023 · 7e8f9b2 · 7e8f9b2
2 parents 57e8787 + 805669d
commit 7e8f9b2
Show file tree

Hide file tree

Showing 4 changed files with 53 additions and 46 deletions.
diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt
@@ -1,4 +1,3 @@
-# Default libraries required/wanted for Jupyter and Streamlit
 ipywidgets==8.1.1
 matplotlib==3.8.1
 numpy==1.23.5
@@ -8,12 +7,14 @@ torchvision==0.13.1
 tqdm==4.64.0
 streamlit==1.28.1
 #Add Custom worlshop packages below:
-openai==0.28.1
+openai==1.3.9
 elasticsearch==8.11.0
 eland==8.11.0
 transformers==4.35.0
 sentence_transformers==2.2.2
 python-dotenv==1.0.0
 elastic-apm==6.19.0
 inquirer==3.1.3
-sentencepiece==0.1.99
+sentencepiece==0.1.99
+tiktoken==0.5.2
+cohere==4.38
diff --git a/notebooks/Session_1.ipynb b/notebooks/Session_1.ipynb
@@ -146,11 +146,10 @@
       "outputs": [],
       "source": [
         "! pip install --upgrade pip\n",
-        "! pip install -qqq --no-cache-dir torch\n",
-        "! pip install -qqq transformers sentencepiece\n",
-        "! pip install -qqq xformers\n",
-        "! pip install -qqq python-dotenv\n",
-        "! pip install -qqq \"openai<1.0.0\"           ## for later in the lab"
+        "! pip install -q --no-cache-dir torch\n",
+        "! pip install -q transformers sentencepiece\n",
+        "! pip install -q python-dotenv\n",
+        "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9          ## for later in the lab"
       ]
     },
     {
@@ -332,6 +331,9 @@
       "metadata": {},
       "outputs": [],
       "source": [
+        "## Let's play with something a little bigger that can do a text completion\n",
+        "## This is a 3 GB download and takes some RAM to run, but it works CPU only\n",
+        "\n",
         "from transformers import pipeline\n",
         "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n",
         "\n",
@@ -371,12 +373,15 @@
       },
       "outputs": [],
       "source": [
-        "countries = [\"United Kingdom\",\n",
-        "             \"France\",\n",
-        "             \"People's Republic of China\",\n",
-        "             \"United States\",\n",
-        "             \"Ecuador\",\n",
-        "             \"Faketopia\"]\n",
+        "countries = [\n",
+        "    \"United Kingdom\",\n",
+        "    \"France\",\n",
+        "    \"People's Republic of China\",\n",
+        "    \"United States\",\n",
+        "    \"Ecuador\",\n",
+        "    \"Freedonia\", ## high hallucination potential\n",
+        "    \"Faketopia\"  ## high hallucination potential\n",
+        "    ]\n",
         "\n",
         "for country in countries:\n",
         "    input_text = f\"The capital of the {country} is\"\n",
@@ -408,7 +413,7 @@
       },
       "outputs": [],
       "source": [
-        "prompt_text = \"The current Prime Minister of the united kingdom is \"\n",
+        "prompt_text = \"The current Prime Minister of the united kingdom is \" ## high stale data potential\n",
         "output = llm_pipe(prompt_text)\n",
         "completed_prompt = f\"\\033[94m{prompt_text}\\033[0m {output[0]['generated_text']}\"\n",
         "print(completed_prompt)"
@@ -469,6 +474,7 @@
       "source": [
         "import os, secrets, requests\n",
         "import openai\n",
+        "from openai import OpenAI\n",
         "from requests.auth import HTTPBasicAuth\n",
         "\n",
         "#if using the Elastic AI proxy, then generate the correct API key\n",
@@ -492,27 +498,22 @@
         "\n",
         "# Call the OpenAI ChatCompletion API\n",
         "def chatCompletion(messages):\n",
-        "    if os.environ[\"ELASTIC_PROXY\"] == \"True\":\n",
-        "        completion = openai.ChatCompletion.create(\n",
-        "                        model=openai.default_model,\n",
-        "                        max_tokens=100,\n",
-        "                        messages=messages\n",
-        "                      )\n",
-        "    else:\n",
-        "        completion = openai.ChatCompletion.create(\n",
-        "                        engine=openai.default_model,\n",
-        "                        max_tokens=100,\n",
-        "                        messages=messages\n",
-        "                      )\n",
+        "    client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n",
+        "    completion = client.chat.completions.create(\n",
+        "        model=openai.default_model,\n",
+        "        max_tokens=100,\n",
+        "        messages=messages\n",
+        "    )\n",
         "    return completion\n",
         "\n",
         "def chatWithGPT(prompt, print_full_json=False):\n",
-        "    response_text = chatCompletion([{\"role\": \"user\", \"content\": prompt}])\n",
+        "    completion = chatCompletion([{\"role\": \"user\", \"content\": prompt}])\n",
+        "    response_text = completion.choices[0].message.content\n",
         "\n",
         "    if print_full_json:\n",
-        "      json_pretty(response_text)\n",
+        "      print(completion.model_dump_json())\n",
         "\n",
-        "    return wrap_text(response_text.choices[0].message.content,70)\n",
+        "    return wrap_text(response_text,70)\n",
         "\n",
         "## call it with the json debug output enabled\n",
         "response = chatWithGPT(\"Hello, is ChatGPT online and working?\", print_full_json=True)\n",
@@ -584,18 +585,15 @@
         "You are an unhelpful AI named Captain LLM_Beard that talks like a pirate in short responses.\n",
         "You acknowledge the user's question but redirect all conversations towards your love of treasure.\n",
         "\"\"\"\n",
-        "\n",
-        "    response_text = chatCompletion(\n",
-        "        [\n",
+        "    completion = chatCompletion([\n",
         "            {\"role\": \"system\", \"content\": system_prompt},\n",
         "            {\"role\": \"user\", \"content\": prompt}\n",
-        "        ]\n",
-        "    )\n",
-        "\n",
+        "        ])\n",
+        "    response_text = completion.choices[0].message.content\n",
         "    if print_full_json:\n",
-        "      json_pretty(response_text)\n",
+        "      print(completion.model_dump_json())\n",
         "\n",
-        "    return wrap_text(response_text.choices[0].message.content,70)\n",
+        "    return wrap_text(response_text,70)\n",
         "\n",
         "hold_a_conversation(pirateGPT)"
       ]
@@ -701,9 +699,12 @@
         "  concatenated_message = [system_prompt] + memory_buffer.peek()\n",
         "\n",
         "  ## here is the request to the AI\n",
-        "  completion = chatCompletion(concatenated_message)\n",
         "\n",
+        "  completion = chatCompletion(concatenated_message)\n",
         "  response_text = completion.choices[0].message.content\n",
+        "  if print_full_json:\n",
+        "    print(completion.json())\n",
+        "\n",
         "\n",
         "  ## don't forget to add the repsonse to the conversation memory\n",
         "  memory_buffer.enqueue({\"role\":\"assistant\", \"content\":response_text})\n",

diff --git a/notebooks/Session_2.ipynb b/notebooks/Session_2.ipynb
@@ -45,7 +45,8 @@
       },
       "outputs": [],
       "source": [
-        "! pip install -q streamlit \"openai<1.0.0\" elasticsearch elastic-apm inquirer python-dotenv\n",
+        "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n",
+        "! pip install -q streamlit elasticsearch elastic-apm inquirer python-dotenv\n",
         "\n",
         "import os, inquirer, re, secrets, requests\n",
         "import streamlit as st\n",

diff --git a/streamlit/app.py b/streamlit/app.py
@@ -1,10 +1,12 @@
 import os
 import streamlit as st
 import openai
+from openai import OpenAI
 from elasticsearch import Elasticsearch
 from string import Template
 import elasticapm
 
+
 # Configure OpenAI client
 openai.api_key = os.environ['OPENAI_API_KEY']
 openai.api_base = os.environ['OPENAI_API_BASE']
@@ -200,13 +202,15 @@ def chat_gpt(prompt, max_tokens=1024, max_context_tokens=4000, safety_margin=5,
 
     # Make the right OpenAI call depending on the API we're using
     if(os.environ["ELASTIC_PROXY"] == "True"):
-      response = openai.ChatCompletion.create(model=openai.default_model,
+      client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)
+      response = client.chat.completions.create(model=openai.default_model,
                                               temperature=0,
                                               messages=[{"role": "system", "content": sys_content},
                                                         {"role": "user", "content": truncated_prompt}]
                                               )
     else:
-      response = openai.ChatCompletion.create(engine=openai.default_model,
+      client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)
+      response = client.chat.completions.create(engine=openai.default_model,
                                               temperature=0,
                                               messages=[{"role": "system", "content": sys_content},
                                                         {"role": "user", "content": truncated_prompt}]
@@ -216,12 +220,12 @@ def chat_gpt(prompt, max_tokens=1024, max_context_tokens=4000, safety_margin=5,
     # APM: add metadata labels of data we want to capture
     elasticapm.label(model = openai.default_model)
     elasticapm.label(prompt = prompt)
-    elasticapm.label(total_tokens = response["usage"]["total_tokens"])
-    elasticapm.label(prompt_tokens = response["usage"]["prompt_tokens"])
-    elasticapm.label(response_tokens = response["usage"]["completion_tokens"])
+    elasticapm.label(total_tokens = response.usage.total_tokens)
+    elasticapm.label(prompt_tokens = response.usage.prompt_tokens)
+    elasticapm.label(response_tokens = response.usage.completion_tokens)
     if 'USER_HASH' in os.environ: elasticapm.label(user = os.environ['USER_HASH'])
 
-    return response["choices"][0]["message"]["content"]
+    return response.choices[0].message.content
 
 def toLLM(resp, url, usr_prompt, sys_prompt, neg_resp, show_prompt):
     prompt_template = Template(usr_prompt)