diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt index a9459a5..0b025e6 100644 --- a/.devcontainer/requirements.txt +++ b/.devcontainer/requirements.txt @@ -1,4 +1,3 @@ -# Default libraries required/wanted for Jupyter and Streamlit ipywidgets==8.1.1 matplotlib==3.8.1 numpy==1.23.5 @@ -8,7 +7,7 @@ torchvision==0.13.1 tqdm==4.64.0 streamlit==1.28.1 #Add Custom worlshop packages below: -openai==0.28.1 +openai==1.3.9 elasticsearch==8.11.0 eland==8.11.0 transformers==4.35.0 @@ -16,4 +15,6 @@ sentence_transformers==2.2.2 python-dotenv==1.0.0 elastic-apm==6.19.0 inquirer==3.1.3 -sentencepiece==0.1.99 \ No newline at end of file +sentencepiece==0.1.99 +tiktoken==0.5.2 +cohere==4.38 \ No newline at end of file diff --git a/notebooks/Session_1.ipynb b/notebooks/Session_1.ipynb index 9628c6d..0fb81b1 100644 --- a/notebooks/Session_1.ipynb +++ b/notebooks/Session_1.ipynb @@ -146,11 +146,10 @@ "outputs": [], "source": [ "! pip install --upgrade pip\n", - "! pip install -qqq --no-cache-dir torch\n", - "! pip install -qqq transformers sentencepiece\n", - "! pip install -qqq xformers\n", - "! pip install -qqq python-dotenv\n", - "! pip install -qqq \"openai<1.0.0\" ## for later in the lab" + "! pip install -q --no-cache-dir torch\n", + "! pip install -q transformers sentencepiece\n", + "! pip install -q python-dotenv\n", + "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9 ## for later in the lab" ] }, { @@ -332,6 +331,9 @@ "metadata": {}, "outputs": [], "source": [ + "## Let's play with something a little bigger that can do a text completion\n", + "## This is a 3 GB download and takes some RAM to run, but it works CPU only\n", + "\n", "from transformers import pipeline\n", "from transformers import AutoTokenizer, AutoModelForSeq2SeqLM\n", "\n", @@ -371,12 +373,15 @@ }, "outputs": [], "source": [ - "countries = [\"United Kingdom\",\n", - " \"France\",\n", - " \"People's Republic of China\",\n", - " \"United States\",\n", - " \"Ecuador\",\n", - " \"Faketopia\"]\n", + "countries = [\n", + " \"United Kingdom\",\n", + " \"France\",\n", + " \"People's Republic of China\",\n", + " \"United States\",\n", + " \"Ecuador\",\n", + " \"Freedonia\", ## high hallucination potential\n", + " \"Faketopia\" ## high hallucination potential\n", + " ]\n", "\n", "for country in countries:\n", " input_text = f\"The capital of the {country} is\"\n", @@ -408,7 +413,7 @@ }, "outputs": [], "source": [ - "prompt_text = \"The current Prime Minister of the united kingdom is \"\n", + "prompt_text = \"The current Prime Minister of the united kingdom is \" ## high stale data potential\n", "output = llm_pipe(prompt_text)\n", "completed_prompt = f\"\\033[94m{prompt_text}\\033[0m {output[0]['generated_text']}\"\n", "print(completed_prompt)" @@ -469,6 +474,7 @@ "source": [ "import os, secrets, requests\n", "import openai\n", + "from openai import OpenAI\n", "from requests.auth import HTTPBasicAuth\n", "\n", "#if using the Elastic AI proxy, then generate the correct API key\n", @@ -492,27 +498,22 @@ "\n", "# Call the OpenAI ChatCompletion API\n", "def chatCompletion(messages):\n", - " if os.environ[\"ELASTIC_PROXY\"] == \"True\":\n", - " completion = openai.ChatCompletion.create(\n", - " model=openai.default_model,\n", - " max_tokens=100,\n", - " messages=messages\n", - " )\n", - " else:\n", - " completion = openai.ChatCompletion.create(\n", - " engine=openai.default_model,\n", - " max_tokens=100,\n", - " messages=messages\n", - " )\n", + " client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n", + " completion = client.chat.completions.create(\n", + " model=openai.default_model,\n", + " max_tokens=100,\n", + " messages=messages\n", + " )\n", " return completion\n", "\n", "def chatWithGPT(prompt, print_full_json=False):\n", - " response_text = chatCompletion([{\"role\": \"user\", \"content\": prompt}])\n", + " completion = chatCompletion([{\"role\": \"user\", \"content\": prompt}])\n", + " response_text = completion.choices[0].message.content\n", "\n", " if print_full_json:\n", - " json_pretty(response_text)\n", + " print(completion.model_dump_json())\n", "\n", - " return wrap_text(response_text.choices[0].message.content,70)\n", + " return wrap_text(response_text,70)\n", "\n", "## call it with the json debug output enabled\n", "response = chatWithGPT(\"Hello, is ChatGPT online and working?\", print_full_json=True)\n", @@ -584,18 +585,15 @@ "You are an unhelpful AI named Captain LLM_Beard that talks like a pirate in short responses.\n", "You acknowledge the user's question but redirect all conversations towards your love of treasure.\n", "\"\"\"\n", - "\n", - " response_text = chatCompletion(\n", - " [\n", + " completion = chatCompletion([\n", " {\"role\": \"system\", \"content\": system_prompt},\n", " {\"role\": \"user\", \"content\": prompt}\n", - " ]\n", - " )\n", - "\n", + " ])\n", + " response_text = completion.choices[0].message.content\n", " if print_full_json:\n", - " json_pretty(response_text)\n", + " print(completion.model_dump_json())\n", "\n", - " return wrap_text(response_text.choices[0].message.content,70)\n", + " return wrap_text(response_text,70)\n", "\n", "hold_a_conversation(pirateGPT)" ] @@ -701,9 +699,12 @@ " concatenated_message = [system_prompt] + memory_buffer.peek()\n", "\n", " ## here is the request to the AI\n", - " completion = chatCompletion(concatenated_message)\n", "\n", + " completion = chatCompletion(concatenated_message)\n", " response_text = completion.choices[0].message.content\n", + " if print_full_json:\n", + " print(completion.json())\n", + "\n", "\n", " ## don't forget to add the repsonse to the conversation memory\n", " memory_buffer.enqueue({\"role\":\"assistant\", \"content\":response_text})\n", diff --git a/notebooks/Session_2.ipynb b/notebooks/Session_2.ipynb index 0b014f0..3167ef7 100644 --- a/notebooks/Session_2.ipynb +++ b/notebooks/Session_2.ipynb @@ -45,7 +45,8 @@ }, "outputs": [], "source": [ - "! pip install -q streamlit \"openai<1.0.0\" elasticsearch elastic-apm inquirer python-dotenv\n", + "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n", + "! pip install -q streamlit elasticsearch elastic-apm inquirer python-dotenv\n", "\n", "import os, inquirer, re, secrets, requests\n", "import streamlit as st\n", diff --git a/streamlit/app.py b/streamlit/app.py index 5c25456..2fbaf8c 100644 --- a/streamlit/app.py +++ b/streamlit/app.py @@ -1,10 +1,12 @@ import os import streamlit as st import openai +from openai import OpenAI from elasticsearch import Elasticsearch from string import Template import elasticapm + # Configure OpenAI client openai.api_key = os.environ['OPENAI_API_KEY'] openai.api_base = os.environ['OPENAI_API_BASE'] @@ -200,13 +202,15 @@ def chat_gpt(prompt, max_tokens=1024, max_context_tokens=4000, safety_margin=5, # Make the right OpenAI call depending on the API we're using if(os.environ["ELASTIC_PROXY"] == "True"): - response = openai.ChatCompletion.create(model=openai.default_model, + client = OpenAI(api_key=openai.api_key, base_url=openai.api_base) + response = client.chat.completions.create(model=openai.default_model, temperature=0, messages=[{"role": "system", "content": sys_content}, {"role": "user", "content": truncated_prompt}] ) else: - response = openai.ChatCompletion.create(engine=openai.default_model, + client = OpenAI(api_key=openai.api_key, base_url=openai.api_base) + response = client.chat.completions.create(engine=openai.default_model, temperature=0, messages=[{"role": "system", "content": sys_content}, {"role": "user", "content": truncated_prompt}] @@ -216,12 +220,12 @@ def chat_gpt(prompt, max_tokens=1024, max_context_tokens=4000, safety_margin=5, # APM: add metadata labels of data we want to capture elasticapm.label(model = openai.default_model) elasticapm.label(prompt = prompt) - elasticapm.label(total_tokens = response["usage"]["total_tokens"]) - elasticapm.label(prompt_tokens = response["usage"]["prompt_tokens"]) - elasticapm.label(response_tokens = response["usage"]["completion_tokens"]) + elasticapm.label(total_tokens = response.usage.total_tokens) + elasticapm.label(prompt_tokens = response.usage.prompt_tokens) + elasticapm.label(response_tokens = response.usage.completion_tokens) if 'USER_HASH' in os.environ: elasticapm.label(user = os.environ['USER_HASH']) - return response["choices"][0]["message"]["content"] + return response.choices[0].message.content def toLLM(resp, url, usr_prompt, sys_prompt, neg_resp, show_prompt): prompt_template = Template(usr_prompt)