diff --git a/.devcontainer/requirements.txt b/.devcontainer/requirements.txt
index 0b025e6..bccee54 100644
--- a/.devcontainer/requirements.txt
+++ b/.devcontainer/requirements.txt
@@ -2,19 +2,26 @@ ipywidgets==8.1.1
 matplotlib==3.8.1
 numpy==1.23.5
 pandas==1.5.3
-torch==1.12.1
-torchvision==0.13.1
+torch==2.1.0
+torchvision==0.16.0
 tqdm==4.64.0
-streamlit==1.28.1
+streamlit==1.30.0
 #Add Custom worlshop packages below:
 openai==1.3.9
-elasticsearch==8.11.0
-eland==8.11.0
+elasticsearch==8.12.0
+eland==8.11.1
 transformers==4.35.0
 sentence_transformers==2.2.2
 python-dotenv==1.0.0
-elastic-apm==6.19.0
-inquirer==3.1.3
+elastic-apm==6.20.0
+inquirer==3.2.1
 sentencepiece==0.1.99
 tiktoken==0.5.2
-cohere==4.38
\ No newline at end of file
+cohere==4.38
+elastic-apm==6.20.0
+langchain==0.1.3
+beautifulsoup4==4.11.2
+scikit-learn==1.2.2
+scipy==1.11.4
+elasticsearch-llm-cache==0.9.5
+git+https://github.com/elastic/notebook-workshop-loader.git@main
\ No newline at end of file
diff --git a/notebooks/Session_2.ipynb b/notebooks/Session_2.ipynb
deleted file mode 100644
index 3167ef7..0000000
--- a/notebooks/Session_2.ipynb
+++ /dev/null
@@ -1,528 +0,0 @@
-{
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "nFbQGw2POViM"
-      },
-      "source": [
-        "# Lab 2"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Setup Environment\n",
-        "The following code loads the environment variables required to run this notebook.\n"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {},
-      "outputs": [],
-      "source": [
-        "FILE=\"Session 2\"\n",
-        "\n",
-        "! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n",
-        "from notebookworkshoploader import loader\n",
-        "import os\n",
-        "from dotenv import load_dotenv\n",
-        "\n",
-        "if os.path.isfile(\"../env\"):\n",
-        "    load_dotenv(\"../env\", override=True)\n",
-        "    print('Successfully loaded environment variables from local env file')\n",
-        "else:\n",
-        "    loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Ln-8SRvAI-jS"
-      },
-      "outputs": [],
-      "source": [
-        "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n",
-        "! pip install -q streamlit elasticsearch elastic-apm inquirer python-dotenv\n",
-        "\n",
-        "import os, inquirer, re, secrets, requests\n",
-        "import streamlit as st\n",
-        "import openai\n",
-        "\n",
-        "from IPython.display import display\n",
-        "from ipywidgets import widgets\n",
-        "from pprint import pprint\n",
-        "from elasticsearch import Elasticsearch\n",
-        "from string import Template\n",
-        "from requests.auth import HTTPBasicAuth\n",
-        "\n",
-        "#if using the Elastic AI proxy, then generate the correct API key\n",
-        "if os.environ['ELASTIC_PROXY'] == \"True\":\n",
-        "\n",
-        "  #remove the api type variable: it's a must when using the proxy\n",
-        "  if \"OPENAI_API_TYPE\" in os.environ: del os.environ[\"OPENAI_API_TYPE\"]\n",
-        "\n",
-        "  #generate and share \"your\" unique hash\n",
-        "  os.environ['USER_HASH'] = secrets.token_hex(nbytes=6)\n",
-        "  print(f\"Your unique user hash is: {os.environ['USER_HASH']}\")\n",
-        "\n",
-        "  #get the current API key and combine with your hash\n",
-        "  os.environ['OPENAI_API_KEY'] = f\"{os.environ['OPENAI_API_KEY']} {os.environ['USER_HASH']}\"\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "wEOV5KlycEmV"
-      },
-      "source": [
-        "## Create Elasticsearch client connection"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "AUIB9n9ccGAR"
-      },
-      "outputs": [],
-      "source": [
-        "if 'ELASTIC_CLOUD_ID' in os.environ:\n",
-        "  es = Elasticsearch(\n",
-        "    cloud_id=os.environ['ELASTIC_CLOUD_ID'],\n",
-        "    api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n",
-        "    request_timeout=30\n",
-        "  )\n",
-        "elif 'ELASTIC_URL' in os.environ:\n",
-        "  es = Elasticsearch(\n",
-        "    os.environ['ELASTIC_URL'],\n",
-        "    api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n",
-        "    request_timeout=30\n",
-        "  )\n",
-        "else:\n",
-        "  print(\"env needs to set either ELASTIC_CLOUD_ID or ELASTIC_URL\")"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "E0uQujqZclf0"
-      },
-      "source": [
-        "# Lab 2-1\n",
-        "- Chunking (simplified example)\n",
-        "- Generating embeddings\n",
-        "- Perform kNN search"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "fQQ-GAcOecAE"
-      },
-      "source": [
-        "## Chunking\n",
-        "Simplfied example"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "RCArFD3kecTU"
-      },
-      "outputs": [],
-      "source": [
-        "body_content = \"Elastic Docs › Elasticsearch Guide [8.8] « Searchable snapshots Elasticsearch security principles » Secure the Elastic Stack edit The Elastic Stack is comprised of many moving parts. There are the Elasticsearch nodes that form the cluster, plus Logstash instances, Kibana instances, Beats agents, and clients all communicating with the cluster. To keep your cluster safe, adhere to the Elasticsearch security principles . The first principle is to run Elasticsearch with security enabled. Configuring security can be complicated, so we made it easy to start the Elastic Stack with security enabled and configured . For any new clusters, just start Elasticsearch to automatically enable password protection, secure internode communication with Transport Layer Security (TLS), and encrypt connections between Elasticsearch and Kibana. If you have an existing, unsecured cluster (or prefer to manage security on your own), you can manually enable and configure security to secure Elasticsearch clusters and any clients that communicate with your clusters. You can also implement additional security measures, such as role-based access control, IP filtering, and auditing. Enabling security protects Elasticsearch clusters by: Preventing unauthorized access with password protection, role-based access control, and IP filtering. Preserving the integrity of your data with SSL/TLS encryption. Maintaining an audit trail so you know who’s doing what to your cluster and the data it stores. If you plan to run Elasticsearch in a Federal Information Processing Standard (FIPS) 140-2 enabled JVM, see FIPS 140-2 . Preventing unauthorized access edit To prevent unauthorized access to your Elasticsearch cluster, you need a way to authenticate users in order to validate that a user is who they claim to be. For example, making sure that only the person named Kelsey Andorra can sign in as the user kandorra . The Elasticsearch security features provide a standalone authentication mechanism that enables you to quickly password-protect your cluster. If you’re already using LDAP, Active Directory, or PKI to manage users in your organization, the security features integrate with those systems to perform user authentication. In many cases, authenticating users isn’t enough. You also need a way to control what data users can access and what tasks they can perform. By enabling the Elasticsearch security features, you can authorize users by assigning access privileges to roles and assigning those roles to users. Using this role-based access control mechanism (RBAC), you can limit the user kandorra to only perform read operations on the events index restrict access to all other indices. The security features also enable you to restrict the nodes and clients that can connect to the cluster based on IP filters . You can block and allow specific IP addresses, subnets, or DNS domains to control network-level access to a cluster. See User authentication and User authorization . Preserving data integrity and confidentiality edit A critical part of security is keeping confidential data secured. Elasticsearch has built-in protections against accidental data loss and corruption. However, there’s nothing to stop deliberate tampering or data interception. The Elastic Stack security features use TLS to preserve the integrity of your data against tampering, while also providing confidentiality by encrypting communications to, from, and within the cluster. For even greater protection, you can increase the encryption strength . See Configure security for the Elastic Stack . Maintaining an audit trail edit Keeping a system secure takes vigilance. By using Elastic Stack security features to maintain an audit trail, you can easily see who is accessing your cluster and what they’re doing. You can configure the audit level, which accounts for the type of events that are logged. These events include failed authentication attempts, user access denied, node connection denied, and more. By analyzing access patterns and failed attempts to access your cluster, you can gain insights into attempted attacks and data breaches. Keeping an auditable log of the activity in your cluster can also help diagnose operational issues. See Enable audit logging . « Searchable snapshots Elasticsearch security principles » Most Popular Video Get Started with Elasticsearch Video Intro to Kibana Video ELK for Logs & Metrics\"\n",
-        "print (\"The length of the paragraph is %s characters\" % len (body_content))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "-NGORlTWncWt"
-      },
-      "source": [
-        "There are many ways to split text. We can split on individual characters, spaces, at a set length, using a library like langchain, or using a tokenizer, to name a few ways.\n",
-        "\n",
-        "For this simple example we are going to split on dot+space \". \", essentially spliting individual sentences."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "sVu0PcLzolSX"
-      },
-      "outputs": [],
-      "source": [
-        "chunked_content = [chunk for chunk in re.split('\\. ',  body_content)]\n",
-        "chunk = chunked_content[0] # We'll use this later\n",
-        "print (\"There are now %s sentence chunks.\\nThe first element is:'%s'\" % (len(chunked_content), chunk))"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "D7NHs6s2qYii"
-      },
-      "source": [
-        "TODO Talk about tokens"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "UwAOqksbrOUv"
-      },
-      "outputs": [],
-      "source": [
-        "# Show the \"tokens\" from the first chunk\n",
-        "chunk.split()"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "4DL2MkCprTBe"
-      },
-      "source": [
-        "## Generate embeddings"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "i3xI7Rg8rYOL"
-      },
-      "source": [
-        "We need to pass our text to an embedding model to generate vectors.\n",
-        "\n",
-        "Models have pre-definied token limits which restrict the amount of text (tokens really) that can be processed into vectors."
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Co_jW0DevAUY"
-      },
-      "outputs": [],
-      "source": [
-        "es_model_id = 'sentence-transformers__msmarco-minilm-l-12-v3'"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "lXVW_fOpsr0H"
-      },
-      "outputs": [],
-      "source": [
-        "chunk"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "K8MOh-2Fqbr1"
-      },
-      "outputs": [],
-      "source": [
-        "docs =  [\n",
-        "    {\n",
-        "      \"text_field\": chunk\n",
-        "    }\n",
-        "]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Q-n1lTvAvVht"
-      },
-      "outputs": [],
-      "source": [
-        "chunk_vector = es.ml.infer_trained_model(model_id=es_model_id, docs=docs, )"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "1XHbhuX4v3ky"
-      },
-      "outputs": [],
-      "source": [
-        "vector_doc = {\n",
-        "  \"_index\": \"chunker\",\n",
-        "  \"_id\": \"64837860d86b1293a9a5f620-0\",\n",
-        "  \"_source\": {\n",
-        "      \"chunk\" : chunk,\n",
-        "      \"chunk-vector\" : chunk_vector['inference_results'][0]['predicted_value'],\n",
-        "      \"body_content\" : body_content\n",
-        "  }\n",
-        "}\n",
-        "\n",
-        "pprint(vector_doc)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "LEK9CFy9w-6d"
-      },
-      "source": [
-        "## Exceeding the model's token limit"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Ndjrwim0XS-m"
-      },
-      "source": [
-        "Let's take a look at what happens when we exceed the model's token limit"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "ZVIaFOG-yaKJ"
-      },
-      "outputs": [],
-      "source": [
-        "full_paragraph =  [\n",
-        "    {\n",
-        "      \"text_field\": body_content\n",
-        "    }\n",
-        "]"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "6msjDqmZyaKJ"
-      },
-      "outputs": [],
-      "source": [
-        "chunk_vector = es.ml.infer_trained_model(model_id=es_model_id, docs=full_paragraph, )\n",
-        "print(\"When the token size exceeds the model's max token limit, the value of `is_truncated` will return True\")\n",
-        "print('We exceeded the model token limit: %s' % chunk_vector['inference_results'][0]['is_truncated'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "qYMM629tXYF8"
-      },
-      "source": [
-        "We see that the model still processed the tokens up to it's limit, then simply truncated (ignored) any tokens longer than that.\n",
-        "\n",
-        "Elasticsearch returns a `is_truncated : True` key:value to let you know the embedding returned is not for the full text."
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tIbhhCa4oKPO"
-      },
-      "source": [
-        "## Querying with hybrid vector search"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "cFNyCWCWp_nI"
-      },
-      "source": [
-        "We will run through an example of searching with approximate kNN vector search combined with BM25 text search combing the results with rrf.\n",
-        "\n",
-        "This is the type of query that will power the UI we will use in lab 2-2"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "yVqwwfskoKqP"
-      },
-      "outputs": [],
-      "source": [
-        "def search_with_knn(query_text, es):\n",
-        "    # Elasticsearch query (BM25) and kNN configuration for rrf hybrid search\n",
-        "\n",
-        "    query = {\n",
-        "        \"bool\": {\n",
-        "            \"must\": [{\n",
-        "                \"match\": {\n",
-        "                    \"body_content\": {\n",
-        "                        \"query\": query_text\n",
-        "                    }\n",
-        "                }\n",
-        "            }],\n",
-        "            \"filter\": [{\n",
-        "              \"term\": {\n",
-        "                \"url_path_dir3\": \"elasticsearch\"\n",
-        "              }\n",
-        "            }]\n",
-        "        }\n",
-        "    }\n",
-        "\n",
-        "    knn = [\n",
-        "    {\n",
-        "      \"field\": \"chunk-vector\",\n",
-        "      \"k\": 10,\n",
-        "      \"num_candidates\": 10,\n",
-        "      \"filter\": {\n",
-        "        \"bool\": {\n",
-        "          \"filter\": [\n",
-        "            {\n",
-        "              \"range\": {\n",
-        "                \"chunklength\": {\n",
-        "                  \"gte\": 0\n",
-        "                }\n",
-        "              }\n",
-        "            },\n",
-        "            {\n",
-        "              \"term\": {\n",
-        "                \"url_path_dir3\": \"elasticsearch\"\n",
-        "              }\n",
-        "            }\n",
-        "          ]\n",
-        "        }\n",
-        "      },\n",
-        "      \"query_vector_builder\": {\n",
-        "        \"text_embedding\": {\n",
-        "          \"model_id\": \"sentence-transformers__msmarco-minilm-l-12-v3\",\n",
-        "          \"model_text\": query_text\n",
-        "        }\n",
-        "      }\n",
-        "    }\n",
-        "  ]\n",
-        "\n",
-        "    rank = {\n",
-        "       \"rrf\": {\n",
-        "       }\n",
-        "   }\n",
-        "\n",
-        "    fields= [\n",
-        "        \"title\",\n",
-        "        \"url\",\n",
-        "        \"body_content\"\n",
-        "      ]\n",
-        "\n",
-        "    resp = es.search(index=os.environ['ELASTIC_INDEX_DOCS'],\n",
-        "                     query=query,\n",
-        "                     knn=knn,\n",
-        "                     rank=rank,\n",
-        "                     fields=fields,\n",
-        "                     size=1,\n",
-        "                     source=False)\n",
-        "\n",
-        "    return resp\n",
-        "\n",
-        "query = 'How do I start Elastic with Security Enabled?'\n",
-        "response = search_with_knn(query, es)\n",
-        "pprint(response['hits'])"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "tCsxLt8Yc2LI"
-      },
-      "source": [
-        "# Lab 2-2\n",
-        "RAG"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "ELRm5s9pc5Jg"
-      },
-      "source": [
-        "## Verify our Elasticsearch connection is still active\n",
-        "If you receive an error, rerun the cells in the Setup section above"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "Isssrm07dKMe"
-      },
-      "outputs": [],
-      "source": [
-        "print(es.info()['tagline']) # should return cluster info"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "t7RmurdZNPg-"
-      },
-      "source": [
-        "## Main Script\n",
-        "We've placed the sample code in the streamlit folder of this repository\n",
-        "\n",
-        "Take a look at the code [streamlit/app.py](../streamlit/app.py)"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "Wu0KfS0ESf6e"
-      },
-      "source": [
-        "## Streamlit\n",
-        "To start the Streamlit app you need to use the ```streamlit run``` command from the folder.  You can do this either from this notebook or the Visual Studio Code terminal provided in Github Codespaces"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "cHIHFID3NBXa"
-      },
-      "outputs": [],
-      "source": [
-        "! cd ../streamlit; streamlit run app.py "
-      ]
-    }
-  ],
-  "metadata": {
-    "colab": {
-      "provenance": []
-    },
-    "kernelspec": {
-      "display_name": "Python 3",
-      "name": "python3"
-    },
-    "language_info": {
-      "codemirror_mode": {
-        "name": "ipython",
-        "version": 3
-      },
-      "file_extension": ".py",
-      "mimetype": "text/x-python",
-      "name": "python",
-      "nbconvert_exporter": "python",
-      "pygments_lexer": "ipython3",
-      "version": "3.10.8"
-    }
-  },
-  "nbformat": 4,
-  "nbformat_minor": 0
-}
diff --git a/notebooks/Session_1.ipynb b/notebooks/genai_colab_lab1and2.ipynb
similarity index 69%
rename from notebooks/Session_1.ipynb
rename to notebooks/genai_colab_lab1and2.ipynb
index 0fb81b1..49c8f73 100644
--- a/notebooks/Session_1.ipynb
+++ b/notebooks/genai_colab_lab1and2.ipynb
@@ -3,45 +3,24 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "YeTXscrkNOF1"
+        "id": "EEeUZ1r79kr0"
       },
       "source": [
-        "# [Session 1] Introduction to Generative AI\n",
-        "\n",
-        "This notebook includes *all* the labs of Session 1.  They are meant to be run in order. If you finish early, try playing around with the LLMs loaded to that point.\n",
-        "\n",
-        "## Tips on using Jupyter Notebooks\n",
-        "\n",
-        "* Notebooks are just .ipynb files, you can run them locally in any python dev environment if you'd like. We are running in Github Codespaces to keep things simple (and well tested) for this course.\n",
-        "\n",
-        "* In Github Codespaces, each notebook has its own independent execution environment.\n",
-        "  * You can see the file system in the left hand icon tray, just like Visual Studio Code\n",
-        "\n",
-        "* Notebooks have Markdown and Code snippets\n",
-        "  * You can access the shell of the coding environment with a ```!``` command\n",
-        "  * Run each code sample in order. Notebooks will usually import libraries they need as the workshop progresses\n",
-        "\n",
-        "* You can always restart your code environment and rerun if you get into trouble\n",
-        "  * in-memory variables are lost on a **restart**\n"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {},
-      "source": [
-        "## Setup Environment\n",
+        "# Setup Environment\n",
         "The following code loads the environment variables required to run this notebook.\n"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
-      "metadata": {},
+      "metadata": {
+        "id": "iqND-S5P9kr0"
+      },
       "outputs": [],
       "source": [
-        "FILE=\"Session 1\"\n",
+        "FILE=\"GenAI Lab 1 and 2\"\n",
         "\n",
-        "! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n",
+        "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n",
         "from notebookworkshoploader import loader\n",
         "import os\n",
         "from dotenv import load_dotenv\n",
@@ -59,7 +38,7 @@
         "id": "Sm8uPLZxNOF3"
       },
       "source": [
-        "## Lab 1-1: Introduction and Transformer Models\n",
+        "# Lab 1-1: Using Transformer Models\n",
         "\n",
         "In this lab we will\n",
         "* Intro to Python Notebooks - Hello World, importing python libraries\n",
@@ -68,22 +47,13 @@
         "\n"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "j9xHoMleO_xz"
-      },
-      "source": [
-        "### Notebook Hello World"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "HqlXsCGMN6TC"
       },
       "source": [
-        "#### Step 1: Hit play on the next code **sample**"
+        "## Step 1: Hit play on the next code **sample**"
       ]
     },
     {
@@ -103,7 +73,7 @@
         "id": "P85HsyKmNOF4"
       },
       "source": [
-        "#### Step 2: Use ! to execute a shell command"
+        "## Step 2: Use ! to execute a shell command"
       ]
     },
     {
@@ -117,22 +87,13 @@
         "! echo \"The shell thinks the Current Directory is: $(pwd)\""
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "mZ_VBiFPPIM5"
-      },
-      "source": [
-        "### Getting some python dependencies"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "lSXuTJelNOF5"
       },
       "source": [
-        "#### Step 3: Environment setup\n",
+        "## Step 3: Environment setup\n",
         "\n",
         "First let us import some Python libraries we'll use in the first lab module."
       ]
@@ -145,11 +106,12 @@
       },
       "outputs": [],
       "source": [
-        "! pip install --upgrade pip\n",
-        "! pip install -q --no-cache-dir torch\n",
-        "! pip install -q transformers sentencepiece\n",
-        "! pip install -q python-dotenv\n",
-        "! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9          ## for later in the lab"
+        "! ## pip install -qqq --upgrade pip\n",
+        "# ! pip install -qqq torch==2.1.0\n",
+        "# ! pip install -qqq --upgrade transformers==4.36.2\n",
+        "# ! pip install -qqq python-dotenv==1.0.0\n",
+        "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9          ## for later in the lab\n",
+        "! echo \"github codespaces has pre-installed these libraries\""
       ]
     },
     {
@@ -158,7 +120,7 @@
         "id": "Kk2GXx6ANOF5"
       },
       "source": [
-        "#### Step 4: Utility functions\n",
+        "## Step 4: Utility functions\n",
         "Some utility functions that are good to keep on hand"
       ]
     },
@@ -184,22 +146,13 @@
         "\n"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "IZalQ_D_PXCg"
-      },
-      "source": [
-        "### Downloading, Caching, and Prepping a Model"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "ZIPsPhYuNOF5"
       },
       "source": [
-        "#### Step 5: Download sentiment analysis model\n",
+        "## Step 5: Download sentiment analysis model from HuggingFace\n",
         "\n",
         "We'll use the Huggingface Transformer library to download and ready an Open Source model called DistilBERT which can be used for sentiment analysis.\n",
         "\n",
@@ -245,9 +198,9 @@
         "id": "1N1DeQ4SNOF5"
       },
       "source": [
-        "Okay! let's run the model ```sentiment_model``` on two pieces of sample text.\n",
         "\n",
-        "#### Step 6: Run sentiment analysis"
+        "## Step 6: Run sentiment analysis\n",
+        "Okay! let's run the model ```sentiment_model``` on two pieces of sample text."
       ]
     },
     {
@@ -303,32 +256,26 @@
         "json_pretty(your_classifier_results)"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "1F5ytSTdPoCo"
-      },
-      "source": [
-        "### Generative LLM - Simple and Local"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "n5f1vIeeNOF6"
       },
       "source": [
-        "#### Step 7: Download Flan T5\n",
+        "## Step 7: Generative LLM - Simple and Local - Download Flan T5\n",
         "\n",
-        "Let's start with the Hello World of generative AI examples: completing a sentence. For this we'll install A fine tuned variant of the Flan-T5 model.\n",
+        "Let's start with the Hello World of generative AI examples: completing a sentence. For this we'll install a fine tuned Flan-T5 variant model. ([LaMini-T5 ](https://huggingface.co/MBZUAI/LaMini-T5-738M))\n",
         "\n",
-        "Note, while this is a smaller checkpoint of the model, it is still a 3GB download.  We'll cache the files in the same folder."
+        "Note, while this is a smaller checkpoint of the model, it is still a 3GB download.  We'll cache the files in the same folder.\n",
+        "\n"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
-      "metadata": {},
+      "metadata": {
+        "id": "l6kqG5siNOF6"
+      },
       "outputs": [],
       "source": [
         "## Let's play with something a little bigger that can do a text completion\n",
@@ -341,11 +288,9 @@
         "# model_name = \"MBZUAI/LaMini-T5-223M\"\n",
         "model_name = \"MBZUAI/LaMini-T5-738M\"\n",
         "\n",
-        "cache_directory = \"llm_download_cache\"\n",
-        "\n",
-        "llm_tokenizer = AutoTokenizer.from_pretrained(model_name, \n",
+        "llm_tokenizer = AutoTokenizer.from_pretrained(model_name,\n",
         "                                              cache_dir=cache_directory)\n",
-        "llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name, \n",
+        "llm_model = AutoModelForSeq2SeqLM.from_pretrained(model_name,\n",
         "                                                  cache_dir=cache_directory)\n",
         "\n",
         "llm_pipe = pipeline(\n",
@@ -353,7 +298,7 @@
         "        model=llm_model,\n",
         "        tokenizer=llm_tokenizer,\n",
         "        max_length=100\n",
-        "    )"
+        "    )\n"
       ]
     },
     {
@@ -362,7 +307,7 @@
         "id": "RSCILNELNOF6"
       },
       "source": [
-        "#### Step 8: Generate text completions"
+        "## Step 8: Generate text completions, watch for Hallucinations"
       ]
     },
     {
@@ -413,7 +358,7 @@
       },
       "outputs": [],
       "source": [
-        "prompt_text = \"The current Prime Minister of the united kingdom is \" ## high stale data potential\n",
+        "prompt_text = \"The current Prime Minister of the United Kingdom is\" ## high stale data potential\n",
         "output = llm_pipe(prompt_text)\n",
         "completed_prompt = f\"\\033[94m{prompt_text}\\033[0m {output[0]['generated_text']}\"\n",
         "print(completed_prompt)"
@@ -437,31 +382,22 @@
         "id": "_y2q5sxMNOF7"
       },
       "source": [
-        "## Lab 1-2: Prompts and Context Windows\n",
+        "# Lab 2-1: Prompts and Basic Chatbots\n",
         "\n",
         "* Using langchain with local LLM\n",
         "* Connect to Open AI\n",
         "* Using a memory window to create a txt-only GPT conversation"
       ]
     },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "m23fz0EhQEGz"
-      },
-      "source": [
-        "### Basic chat completion"
-      ]
-    },
     {
       "cell_type": "markdown",
       "metadata": {
         "id": "cRBAXFBiNOF7"
       },
       "source": [
-        "#### Step 1: Using the OpenAI python library\n",
+        "## Step 1: Using the OpenAI python library\n",
         "\n",
-        "❗ Note: if you restarted your Notebook, you may need to re-run the first stup step back and the very top before coming back here ❗"
+        "❗ Note: if you restarted your google Colab, you may need to re-run the first stup step back and the very top before coming back here ❗"
       ]
     },
     {
@@ -494,8 +430,26 @@
         "\n",
         "openai.api_key = os.environ['OPENAI_API_KEY']\n",
         "openai.api_base = os.environ['OPENAI_API_BASE']\n",
-        "openai.default_model = os.environ['OPENAI_API_ENGINE']\n",
-        "\n",
+        "openai.default_model = os.environ['OPENAI_API_ENGINE']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "qpO5xbubDz_T"
+      },
+      "source": [
+        "## Step 2: Test call to ChatGPT"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "ETYE5zfSD2J7"
+      },
+      "outputs": [],
+      "source": [
         "# Call the OpenAI ChatCompletion API\n",
         "def chatCompletion(messages):\n",
         "    client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n",
@@ -511,7 +465,7 @@
         "    response_text = completion.choices[0].message.content\n",
         "\n",
         "    if print_full_json:\n",
-        "      print(completion.model_dump_json())\n",
+        "      print(completion.json())\n",
         "\n",
         "    return wrap_text(response_text,70)\n",
         "\n",
@@ -528,8 +482,9 @@
         "id": "WTyQ4_SFNOF7"
       },
       "source": [
-        "Feeding user input in for single questions is easy\n",
-        "#### Step 2: A conversation loop -  ❗ type \"exit\" to end the chat ❗"
+        "\n",
+        "## Step 3: A conversation loop -  ❗ type \"exit\" to end the chat ❗\n",
+        "Feeding user input in for single questions is easy"
       ]
     },
     {
@@ -540,21 +495,15 @@
       },
       "outputs": [],
       "source": [
-        "## utility function to print in a different color for debug output\n",
-        "def print_light_blue(text):\n",
-        "    print(f'\\033[94m{text}\\033[0m')\n",
-        "\n",
         "def hold_a_conversation(ai_conversation_function = chatWithGPT):\n",
-        "  print_light_blue(\" -- Have a conversation with an AI: \")\n",
-        "  print_light_blue(\" -- type 'exit' when done\")\n",
+        "  print(\" -- Have a conversation with an AI: \")\n",
+        "  print(\" -- type 'exit' when done\")\n",
         "\n",
-        "  user_input = input(\"Enter your chat > \")\n",
+        "  user_input = input(\"> \")\n",
         "  while not user_input.lower().startswith(\"exit\"):\n",
-        "      print_light_blue(f\"> {user_input}\")\n",
-        "      ai_response = ai_conversation_function(user_input, False)\n",
-        "      print(ai_response)\n",
-        "      print_light_blue(\" -- type 'exit' when done\")\n",
-        "      user_input = input(\"Enter your chat > \")\n",
+        "      print(ai_conversation_function(user_input, False))\n",
+        "      print(\" -- type 'exit' when done\")\n",
+        "      user_input = input(\"> \")\n",
         "  print(\"\\n -- end conversation --\")\n",
         "\n",
         "## we are passing the previously defined function as a parameter\n",
@@ -567,9 +516,10 @@
         "id": "PdjzXPl6NOF7"
       },
       "source": [
-        "You can use the system prompt to adjust the AI and it's responses and purpose\n",
         "\n",
-        "#### Step 3: See the impact of changing the system prompt"
+        "\n",
+        "## Step 4: See the impact of changing the system prompt\n",
+        "You can use the system prompt to adjust the AI and it's responses and purpose"
       ]
     },
     {
@@ -591,10 +541,11 @@
         "        ])\n",
         "    response_text = completion.choices[0].message.content\n",
         "    if print_full_json:\n",
-        "      print(completion.model_dump_json())\n",
+        "      print(completion.json())\n",
         "\n",
         "    return wrap_text(response_text,70)\n",
         "\n",
+        "\n",
         "hold_a_conversation(pirateGPT)"
       ]
     },
@@ -604,7 +555,7 @@
         "id": "HHiuOMCeNOF8"
       },
       "source": [
-        "### Giving the AI conversation memory\n",
+        "❗ Note ❗\n",
         "\n",
         "This isn't a conversation yet because the AI has no memory of past interactions.\n",
         "\n",
@@ -618,6 +569,8 @@
         "I'm sorry, but as an AI, I don't have the ability to know personal\n",
         "preferences or favorite colors.\n",
         "```\n",
+        "There are two problems. First, the LLM is stateless and each call is independent. ChatGPT does not remember our previous prompts.  Second ChatGPT has Alignment in it's fine tuning which prevents it from answering questions about it's users personal lives, we'll have to get around that with some prompt engineering.\n",
+        "\n",
         "Let's use the past conversation as input to subsequent calls. Because the context window is limited AND tokens cost money (if you are using a hosted service like OpenAI) or CPU cycles if you are self-hosting, we need to have a maximum queue size of only remembering things 2 prompts ago (4 total messages)"
       ]
     },
@@ -627,7 +580,7 @@
         "id": "XlbDf77SP3-n"
       },
       "source": [
-        "#### Step 4: Create a chat with memory"
+        "## Step 5: Create a chat with memory"
       ]
     },
     {
@@ -672,7 +625,7 @@
         "          \"role\": \"system\",\n",
         "          \"content\": \"\"\"\n",
         "You are an AI named Cher Horowitz that speaks\n",
-        "in 1990's valley girl dialect of English. \n",
+        "in 1990's valley girl dialect of English.\n",
         "You talk to the human and use the past conversation to inform your answers.\"\"\"\n",
         "      }\n",
         "\n",
@@ -753,16 +706,7 @@
         "id": "92luYkkb5aaX"
       },
       "source": [
-        "## Lab 1-3: Data Redaction"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "YJH4Qofr5mm4"
-      },
-      "source": [
-        "### Model import"
+        "# Lab 2-2: Data Redaction"
       ]
     },
     {
@@ -771,7 +715,7 @@
         "id": "CKKAZragT9ec"
       },
       "source": [
-        "#### Step 1: Install and Import dependencies"
+        "## Step 1: Install and Import dependencies"
       ]
     },
     {
@@ -782,7 +726,9 @@
       },
       "outputs": [],
       "source": [
-        "# ! pip install -q eland elasticsearch transformers sentence_transformers python-dotenv\n",
+        "# ! pip install -qqq eland==8.11.1 elasticsearch==8.12.0 transformers==4.35.0 sentence-transformers==2.2.2 python-dotenv==1.0.0\n",
+        "# ! pip install -qqq elastic-apm==6.20.0\n",
+        "! echo \"github codespaces has pre-installed these libraries\"\n",
         "\n",
         "from elasticsearch import Elasticsearch, helpers, exceptions\n",
         "from eland.ml.pytorch import PyTorchModel\n",
@@ -799,7 +745,7 @@
         "id": "JXj9kw_5UK_V"
       },
       "source": [
-        "#### Step 2: Create Elasticsearch Client Connection"
+        "## Step 2: Create Elasticsearch Client Connection"
       ]
     },
     {
@@ -829,58 +775,283 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "nuiUxbCkYS3b"
+        "id": "G9FWxKtef3uu"
       },
       "source": [
-        "#### Step 3: Define the model import function"
+        "## Step 3: Monitoring prompts sent through a Proxy\n",
+        "\n",
+        "Imagine I have the following question from a customer after a winter storm\n",
+        "\n",
+        "> My power was out all last week at my home at 123 Grove street.\n",
+        "When I talked to my neighbor Jane Lopez, she said she got rebate on her bill.\n",
+        "Can you do the same for me?\n",
+        "\n",
+        "The following is a simulated customer example where we'll use the LLM to answer a customer service case.\n",
+        "\n",
+        "We'll learn how to **retrieve** the best call script using semantic search in a later exercise.  \n",
+        "\n",
+        "**Some organizations would be uncomfortable with customer PII going to a 3rd party service. Who gets an unencrypted version of the prompt?**"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "yn7sULLEYW6w"
+        "id": "ZaucVWDddOuR"
       },
       "outputs": [],
       "source": [
-        "def load_model(model_id, task_type):\n",
-        "  with tempfile.TemporaryDirectory() as tmp_dir:\n",
-        "    print(f\"Loading HuggingFace transformer tokenizer and model [{model_id}] for task [{task_type}]\" )\n",
+        "import elasticapm\n",
+        "import random\n",
         "\n",
-        "    tm = TransformerModel(model_id=model_id, task_type=task_type)\n",
-        "    model_path, config, vocab_path = tm.save(tmp_dir)\n",
+        "os.environ['ELASTIC_APM_SERVICE_NAME'] = \"genai_workshop_lab_redact\"\n",
+        "apmclient = elasticapm.Client() \\\n",
+        "  if elasticapm.get_client() is None \\\n",
+        "  else  elasticapm.get_client()\n",
         "\n",
-        "    ptm = PyTorchModel(es, tm.elasticsearch_model_id())\n",
-        "    model_exists = es.options(ignore_status=404).ml.get_trained_models(model_id=ptm.model_id).meta.status == 200\n",
+        "customer_id = 123\n",
         "\n",
-        "    if model_exists:\n",
-        "      print(\"Model has already been imported\")\n",
-        "    else:\n",
-        "      print(\"Importing model\")\n",
-        "      ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)\n",
-        "      print(\"Starting model deployment\")\n",
-        "      ptm.start()\n",
-        "      print(f\"Model successfully imported with id '{ptm.model_id}'\")"
+        "first_names = [\"Alice\", \"Bob\", \"Charlie\", \"Diana\", \"Edward\",\n",
+        "               \"Fiona\", \"George\", \"Hannah\", \"Ian\", \"Julia\"]\n",
+        "last_names = [\"Smith\", \"Johnson\", \"Williams\", \"Brown\", \"Jones\",\n",
+        "              \"Garcia\", \"Miller\", \"Davis\", \"Rodriguez\", \"Martinez\"]\n",
+        "\n",
+        "# Function to generate a random full name\n",
+        "def generate_random_name():\n",
+        "    first_name = random.choice(first_names)\n",
+        "    last_name = random.choice(last_names)\n",
+        "    return f\"{first_name} {last_name}\"\n",
+        "\n",
+        "\n",
+        "customer_question = f\"\"\"My power was out all last week at my home on Grove street.\n",
+        "When I talked to my neighbor {generate_random_name()},\n",
+        "they said they got rebate on their bill. Can you do the same for me?\"\"\"\n",
+        "\n",
+        "retrieved_best_answer = \"\"\"We are currently offering a $100 rebate for\n",
+        "customers affected by the recent winter storm. If our records show the\n",
+        "customer was impacted, tell them they can look forward to a $100 credit on their\n",
+        "next monthly bill. If the customer believes they were impacted but our records\n",
+        "don't show this fact, let them know we'll be escalating their case and they\n",
+        "should expect a call within 24 hours.\"\"\"\n",
+        "\n",
+        "\n",
+        "import time\n",
+        "def random_service_time(shorter, longer):\n",
+        "  sleep_time = random.uniform(shorter, longer)\n",
+        "  time.sleep(sleep_time)\n",
+        "\n",
+        "def days_impacted_check(customer_id):\n",
+        "  apmclient.begin_transaction(\"impact_check\")\n",
+        "  ## simulated sevice call delay (some parts of the lab LLM are cached)\n",
+        "  random_service_time(0.1,0.3)\n",
+        "  days = 5 ## simulated result of a back end service call\n",
+        "  apmclient.end_transaction(\"impact_check\", \"success\")\n",
+        "  if days > 0 :\n",
+        "    return f\"the customer was impacted by the winter storm for {days} serice days\"\n",
+        "  else:\n",
+        "    return \"the customer was not impacted byt he winter storm\"\n",
+        "\n",
+        "\n",
+        "system_prompt = f\"\"\"\n",
+        "You are an AI customer support agent for a electric power utility company that\n",
+        "You use the following retrieved approved call script and customer fact\n",
+        "to answer the customer's question and try to retain them as a customer.\n",
+        "\n",
+        "Call script: {retrieved_best_answer}\n",
+        "\n",
+        "Our records: {days_impacted_check(customer_id)}\n",
+        "\"\"\"\n",
+        "\n",
+        "def print_light_blue(text):\n",
+        "    print(f'\\033[94m{text}\\033[0m')\n",
+        "\n",
+        "def chatCompletion(messages):\n",
+        "\n",
+        "    client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n",
+        "    completion = client.chat.completions.create(\n",
+        "        model=openai.default_model,\n",
+        "        max_tokens=150,\n",
+        "        messages=messages\n",
+        "    )\n",
+        "\n",
+        "    return completion\n",
+        "\n",
+        "def chatWithPowerAgent(prompt):\n",
+        "    apmclient.begin_transaction(\"llm_call\")\n",
+        "\n",
+        "    elasticapm.label(prompt = prompt)\n",
+        "\n",
+        "    messages = [\n",
+        "        {\"role\": \"system\", \"content\": system_prompt},\n",
+        "        {\"role\": \"user\", \"content\": prompt}\n",
+        "      ]\n",
+        "    print_light_blue(\"Prompt:\")\n",
+        "    print_light_blue(wrap_text(messages[0][\"content\"],70))\n",
+        "    print_light_blue(wrap_text(messages[1][\"content\"],70))\n",
+        "    completion = chatCompletion(messages)\n",
+        "\n",
+        "    response_text = completion.choices[0].message.content\n",
+        "\n",
+        "    apmclient.end_transaction(\"llm_call\", \"success\")\n",
+        "\n",
+        "    return wrap_text(response_text,70)\n",
+        "\n",
+        "\n",
+        "customer_service_response = chatWithPowerAgent(customer_question)\n",
+        "\n",
+        "print(\"Customer Service Response:\")\n",
+        "print(customer_service_response)\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "ILLdLhQzsKY0"
+      },
+      "source": [
+        "## Step 4: Redacting unstructured data with NER Transformer Model"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "VsLcvOMGT8Z1"
+      },
+      "outputs": [],
+      "source": [
+        "from transformers import AutoTokenizer, AutoModelForTokenClassification\n",
+        "from transformers import pipeline\n",
+        "import json\n",
+        "# pretty printing JSON objects\n",
+        "def json_pretty(input_object):\n",
+        "  print(json.dumps(input_object, indent=1))\n",
+        "\n",
+        "tokenizer = AutoTokenizer.from_pretrained(\"dslim/bert-base-NER\")\n",
+        "model = AutoModelForTokenClassification.from_pretrained(\"dslim/bert-base-NER\")\n",
+        "nlp = pipeline(\"ner\", model=model, tokenizer=tokenizer)\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "wJNzYcnJV0f3"
+      },
+      "outputs": [],
+      "source": [
+        "ner_results = nlp(customer_question)\n",
+        "print(ner_results)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "-lD1hCCpGs7H"
+      },
+      "source": [
+        "### Step 5: Let's make an easy to use Redaction Function"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "4ALjsXOCWvId"
+      },
+      "outputs": [],
+      "source": [
+        "\n",
+        "def redact_named_entities(text):\n",
+        "    apmclient.begin_transaction(\"redaction_local\")\n",
+        "    # Perform named entity recognition on the text\n",
+        "    entities = nlp(text)\n",
+        "\n",
+        "    # Sort entities by their start index in reverse order\n",
+        "    entities = sorted(entities, key=lambda x: x['start'], reverse=True)\n",
+        "\n",
+        "    # Iterate over entities and replace them in the text\n",
+        "    for entity in entities:\n",
+        "        ent_type = entity['entity']\n",
+        "        start = entity['start']\n",
+        "        end = entity['end']\n",
+        "        text = text[:start] + \"<REDACTED>\" + text[end:]\n",
+        "\n",
+        "\n",
+        "    apmclient.end_transaction(\"redaction_local\", \"success\")\n",
+        "    return text\n",
+        "\n",
+        "# Example usage\n",
+        "text = \"Alice lives in Paris.\"\n",
+        "redacted_text = redact_named_entities(text)\n",
+        "print(redacted_text)\n"
       ]
     },
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "oYxSkBSJZaAM"
+        "id": "MeqwLVolG4VQ"
       },
       "source": [
-        "#### Step 4: Import the model"
+        "## Step 6: Test the function on a customer question"
       ]
     },
     {
       "cell_type": "code",
       "execution_count": null,
       "metadata": {
-        "id": "A_AL319-dmKJ"
+        "id": "awtGm2vsrE3k"
       },
       "outputs": [],
       "source": [
-        "load_model(\"dslim/bert-base-NER\", \"ner\")"
+        "customer_question = f\"\"\"My power was out all last week at my home at\n",
+        "Grove street. When I talked to my neighbor {generate_random_name()}, they said they got\n",
+        "rebate on their bill. Can you do the same for me?\"\"\"\n",
+        "\n",
+        "print(redact_named_entities(customer_question))\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nuiUxbCkYS3b"
+      },
+      "source": [
+        "## Step 7: Alternatively, how would we install the same NER Model into Elasticsarch?"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "yn7sULLEYW6w"
+      },
+      "outputs": [],
+      "source": [
+        "def load_model(model_id, task_type):\n",
+        "  with tempfile.TemporaryDirectory() as tmp_dir:\n",
+        "    print(f\"Loading HuggingFace transformer tokenizer and model [{model_id}] for task [{task_type}]\" )\n",
+        "\n",
+        "    tm = TransformerModel(model_id=model_id, task_type=task_type)\n",
+        "    model_path, config, vocab_path = tm.save(tmp_dir)\n",
+        "\n",
+        "    ptm = PyTorchModel(es, tm.elasticsearch_model_id())\n",
+        "    model_exists = es.options(ignore_status=404).ml.get_trained_models(model_id=ptm.model_id).meta.status == 200\n",
+        "\n",
+        "    if model_exists:\n",
+        "      print(\"Model has already been imported\")\n",
+        "    else:\n",
+        "      print(\"Importing model\")\n",
+        "      ptm.import_model(model_path=model_path, config_path=None, vocab_path=vocab_path, config=config)\n",
+        "      print(\"Starting model deployment\")\n",
+        "      ptm.start()\n",
+        "      print(f\"Model successfully imported with id '{ptm.model_id}'\")\n",
+        "\n",
+        "## Model is pre-loaded into Elasticsearch, but this is how you would do it\n",
+        "\n",
+        "## load_model(\"dslim/bert-base-NER\", \"ner\")\n",
+        "print(\"Model is already loaded\")"
       ]
     },
     {
@@ -889,7 +1060,7 @@
         "id": "Lg7MWpurb7g2"
       },
       "source": [
-        "### Define the ingest pipeline\n",
+        "## Step 8: Define a Redaction Ingest Pipeline in Elasticsearch\n",
         "\n",
         "We will use the [Elasticsearch Ingest Pipelines](https://www.elastic.co/guide/en/elasticsearch/reference/current/ingest.html) to redact data before it is written to Elasticsearch. These pipelines can also be used to update data in existing indices or for reindexing.\n",
         "\n",
@@ -900,7 +1071,7 @@
         "- Removes the `ml` fields added to the document by the inference processor via the [remove processor](https://www.elastic.co/guide/en/elasticsearch/reference/current/remove-processor.html) as they're no longer needed.\n",
         "- Defines a failure condition to capture any errors, just in case we have them.\n",
         "\n",
-        "**NOTE:** As of 8.9, the redact processor is a Technical Preview.\n",
+        "**NOTE:** As of 8.11, the redact processor is a Technical Preview.\n",
         "\n",
         "\n"
       ]
@@ -971,7 +1142,8 @@
         "  ]\n",
         "}\n",
         "\n",
-        "#es.ingest.put_pipeline(id='redact', body=body)"
+        "## es.ingest.put_pipeline(id='redact', body=body)\n",
+        "print(\"Ingest pipeline is already loaded\")"
       ]
     },
     {
@@ -980,7 +1152,7 @@
         "id": "Dcv-hhwqlkjD"
       },
       "source": [
-        "###Test the pipeline\n",
+        "## Step 9: Test the pipeline\n",
         "\n",
         "Does it work?\n",
         "\n",
@@ -1018,12 +1190,29 @@
     {
       "cell_type": "markdown",
       "metadata": {
-        "id": "IOkgdcu9mCIS"
+        "id": "p6LSsMUyqTdZ"
       },
       "source": [
-        "A good next step after validating the pipeline performs as needed is to create role permissions to limit who can see the original `message` vs the new `redacted` version.\n",
+        "## Step 10: End to End Example, Monitored and Redacted\n",
         "\n",
-        "Check out the [Field Level Security documentation](https://www.elastic.co/guide/en/elasticsearch/reference/current/field-level-security.html)"
+        "Switcing back to the local python model ...\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "iL4A3xUNHQ0K"
+      },
+      "outputs": [],
+      "source": [
+        "customer_question = f\"\"\"My power was out all last week at my home on\n",
+        "Grove street. When I talked to my neighbor {generate_random_name()}, they said they got\n",
+        "rebate on their bill. Can you do the same for me?\"\"\"\n",
+        "\n",
+        "redacted_text = redact_named_entities(customer_question)\n",
+        "\n",
+        "print(chatWithPowerAgent(redacted_text))"
       ]
     },
     {
@@ -1034,8 +1223,8 @@
       "source": [
         "🛑 Stop Here 🛑\n",
         "\n",
-        "This Ends Lab 1-3\n",
-        "<hr/>"
+        "This Ends Lab 2-2\n",
+        "<hr/>\n"
       ]
     }
   ],
@@ -1057,7 +1246,7 @@
       "name": "python",
       "nbconvert_exporter": "python",
       "pygments_lexer": "ipython3",
-      "version": "3.10.8"
+      "version": "3.10.13"
     }
   },
   "nbformat": 4,
diff --git a/notebooks/genai_colab_lab_3.ipynb b/notebooks/genai_colab_lab_3.ipynb
new file mode 100644
index 0000000..2d70453
--- /dev/null
+++ b/notebooks/genai_colab_lab_3.ipynb
@@ -0,0 +1,918 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "45iKqhTIUkjf"
+      },
+      "source": [
+        "# Setup Environment\n",
+        "The following code loads the environment variables required to run this notebook.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "SO7q2oWEURVE"
+      },
+      "outputs": [],
+      "source": [
+        "FILE=\"GenAI Lab 3\"\n",
+        "\n",
+        "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n",
+        "from notebookworkshoploader import loader\n",
+        "import os\n",
+        "from dotenv import load_dotenv\n",
+        "\n",
+        "if os.path.isfile(\"../env\"):\n",
+        "    load_dotenv(\"../env\", override=True)\n",
+        "    print('Successfully loaded environment variables from local env file')\n",
+        "else:\n",
+        "    loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "y2eoZ4hCUud2"
+      },
+      "outputs": [],
+      "source": [
+        "# ! pip install -qqq langchain==0.1.3 sentence-transformers==2.2.2 beautifulsoup4==4.11.2\n",
+        "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n",
+        "# ! pip install -qqq matplotlib==3.8.1 scikit-learn==1.2.2 scipy==1.11.4\n",
+        "# ! pip install -qqq elasticsearch==8.12.0 inquirer==3.2.1\n",
+        "! echo \"github codespaces has pre-installed these libraries\"\n",
+        "\n",
+        "from sentence_transformers import SentenceTransformer\n",
+        "from langchain_community.embeddings import HuggingFaceEmbeddings"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "kpsGUDWAX42B"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "from elasticsearch import Elasticsearch\n",
+        "if 'ELASTIC_CLOUD_ID' in os.environ:\n",
+        "  es = Elasticsearch(\n",
+        "    cloud_id=os.environ['ELASTIC_CLOUD_ID'],\n",
+        "    api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n",
+        "    request_timeout=30\n",
+        "  )\n",
+        "elif 'ELASTIC_URL' in os.environ:\n",
+        "  es = Elasticsearch(\n",
+        "    os.environ['ELASTIC_URL'],\n",
+        "    api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),\n",
+        "    request_timeout=30\n",
+        "  )\n",
+        "else:\n",
+        "  print(\"env needs to set either ELASTIC_CLOUD_ID or ELASTIC_URL\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "59sPX8_HETbL"
+      },
+      "outputs": [],
+      "source": [
+        "import os, secrets, requests\n",
+        "import openai\n",
+        "from openai import OpenAI\n",
+        "from requests.auth import HTTPBasicAuth\n",
+        "\n",
+        "#if using the Elastic AI proxy, then generate the correct API key\n",
+        "if os.environ['ELASTIC_PROXY'] == \"True\":\n",
+        "\n",
+        "    if \"OPENAI_API_TYPE\" in os.environ: del os.environ[\"OPENAI_API_TYPE\"]\n",
+        "\n",
+        "    #generate and share \"your\" unique hash\n",
+        "    os.environ['USER_HASH'] = secrets.token_hex(nbytes=6)\n",
+        "    print(f\"Your unique user hash is: {os.environ['USER_HASH']}\")\n",
+        "\n",
+        "    #get the current API key and combine with your hash\n",
+        "    os.environ['OPENAI_API_KEY'] = f\"{os.environ['OPENAI_API_KEY']} {os.environ['USER_HASH']}\"\n",
+        "else:\n",
+        "    openai.api_type = os.environ['OPENAI_API_TYPE']\n",
+        "    openai.api_version = os.environ['OPENAI_API_VERSION']\n",
+        "\n",
+        "openai.api_key = os.environ['OPENAI_API_KEY']\n",
+        "openai.api_base = os.environ['OPENAI_API_BASE']\n",
+        "openai.default_model = os.environ['OPENAI_API_ENGINE']"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "# Lab 3-1"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iWXrus6La0wF"
+      },
+      "source": [
+        "## Step 1 : Simple vectorization using a Vector Embedding model installed to Elasticsearch\n",
+        "[Model Card - msmarco-MiniLM-L-12-v3](https://huggingface.co/sentence-transformers/msmarco-MiniLM-L-12-v3). - note this model has a 512 token limit"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Sqo3T-LlY_Go"
+      },
+      "outputs": [],
+      "source": [
+        "es_model_id = 'sentence-transformers__msmarco-minilm-l-12-v3'\n",
+        "\n",
+        "## use REST call to Elastic to generate Vector Embedding, assumes model is already installed\n",
+        "def sentence_to_vector_es(chunk, es_model_id=es_model_id):\n",
+        "  docs =  [{\"text_field\": chunk}]\n",
+        "  chunk_vector = es.ml.infer_trained_model(model_id=es_model_id, docs=docs, )\n",
+        "  return chunk_vector['inference_results'][0]['predicted_value']\n",
+        "\n",
+        "\n",
+        "chunk = \"The quick brown fox jumped over the lazy dog\"\n",
+        "es_generated_vector = sentence_to_vector_es(chunk)\n",
+        "print(f\"Dimensions: {len(es_generated_vector)}, \\nVector preview: {es_generated_vector[:5]+ ['...']}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "El7UrXMcgnns"
+      },
+      "source": [
+        "## Step 2: Vectoring Data using a local E5 model and Sentence Transformer\n",
+        "\n",
+        "[Model card E5-large-v2](https://huggingface.co/intfloat/e5-large-v2)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "q6a7y24zeBnU"
+      },
+      "outputs": [],
+      "source": [
+        "# from sentence_transformers import SentenceTransformer\n",
+        "e5_model = SentenceTransformer('intfloat/e5-large-v2')\n",
+        "input_texts = [\n",
+        "    'query: how much protein should a female human eat',\n",
+        "    'query: summit define',\n",
+        "    \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n",
+        "    \"passage: Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.\"\n",
+        "]\n",
+        "embeddings = e5_model.encode(input_texts, normalize_embeddings=True)\n",
+        "close=\" ...]\"\n",
+        "print(f\"Dimensions: {len(embeddings[0])}, \\nVector preview: {str(embeddings[0][:5])[:-1]+close}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "hVZMHggOg4Vt"
+      },
+      "source": [
+        "## Step 3: Doing the same thing but with the LangChain Utility libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "x14UmhoMe6J4"
+      },
+      "outputs": [],
+      "source": [
+        "# from langchain_community.embeddings import HuggingFaceEmbeddings\n",
+        "langchain_e5_embeddings = HuggingFaceEmbeddings(model_name=\"intfloat/e5-large-v2\")\n",
+        "input_texts = [\n",
+        "    'query: how much protein should a female human eat',\n",
+        "    'query: summit define',\n",
+        "    \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n",
+        "    \"passage: Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.\"\n",
+        "]\n",
+        "embeddings = langchain_e5_embeddings.embed_documents(input_texts)\n",
+        "close=\", ...]\"\n",
+        "print(f\"Dimensions: {len(embeddings[0])}, \\nVector preview: {str(embeddings[0][:5])[:-1]+close}\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fIBODndAhoH1"
+      },
+      "source": [
+        "## Step 4: Let's create a simplified graph of generated Embeddings\n",
+        "\n",
+        "Principal Component analysis can be used to simplify higher dimesions into a 2d plot.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "_J1dTm5OiCuY"
+      },
+      "outputs": [],
+      "source": [
+        "# fetch the model and load it\n",
+        "word_model = SentenceTransformer('sentence-transformers/all-MiniLM-L6-v2')\n",
+        "print(\"Model dimensions:\", word_model.get_sentence_embedding_dimension())\n",
+        "\n",
+        "# generate embeddings\n",
+        "embeddings_for_cat = word_model.encode(\"cat\")\n",
+        "embeddings_for_kitten = word_model.encode(\"kitten\")\n",
+        "embeddings_for_dog = word_model.encode(\"dog\")\n",
+        "embeddings_for_puppy = word_model.encode(\"puppy\")\n",
+        "embeddings_for_lawnmower = word_model.encode(\"lawnmower\")\n",
+        "\n",
+        "# let's see what we got, though truncate the embeddings to just the first 5 dimensions\n",
+        "print(f\"embedding dimensions: {embeddings_for_cat.size}\")\n",
+        "print(f\"cat: {list(embeddings_for_cat)[:5] + ['...']}\")\n",
+        "print(f\"dog: {list(embeddings_for_dog)[:5] + ['...']}\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "edsUVexdhsKD"
+      },
+      "outputs": [],
+      "source": [
+        "import pandas as pd\n",
+        "import numpy as np\n",
+        "import altair as alt\n",
+        "from sklearn.decomposition import PCA\n",
+        "\n",
+        "# wrap embeddings with a DataFrame\n",
+        "df = pd.DataFrame(\n",
+        "    [\n",
+        "      [embeddings_for_cat],\n",
+        "      [embeddings_for_kitten],\n",
+        "      [embeddings_for_dog],\n",
+        "      [embeddings_for_puppy],\n",
+        "      [embeddings_for_lawnmower],\n",
+        "    ],\n",
+        "    index=[\"cat\", \"kitten\", \"dog\", \"puppy\", \"lawnmower\"], columns=[\"embeddings\"]\n",
+        ")\n",
+        "\n",
+        "# Initialize the PCA reducer to convert embeddings into arrays of length of 2\n",
+        "reducer = PCA(n_components=2)\n",
+        "\n",
+        "# Reduce the embeddings, store them in a new dataframe column and display their shape\n",
+        "df[\"reduced\"] = reducer.fit_transform(np.stack(df[\"embeddings\"])).tolist()\n",
+        "\n",
+        "\n",
+        "def scatterplot(\n",
+        "    data: pd.DataFrame,\n",
+        "    tooltips=False,\n",
+        "    labels=False,\n",
+        "    width=800,\n",
+        "    height=600,\n",
+        ") -> alt.Chart:\n",
+        "    base_chart = (\n",
+        "        alt.Chart(data)\n",
+        "        .encode(\n",
+        "            alt.X(\"x\", scale=alt.Scale(zero=False)),\n",
+        "            alt.Y(\"y\", scale=alt.Scale(zero=False)),\n",
+        "        )\n",
+        "        .properties(width=width, height=height)\n",
+        "    )\n",
+        "\n",
+        "    if tooltips:\n",
+        "        base_chart = base_chart.encode(alt.Tooltip([\"text\"]))\n",
+        "\n",
+        "    circles = base_chart.mark_circle(\n",
+        "        size=200, color=\"crimson\", stroke=\"white\", strokeWidth=1\n",
+        "    )\n",
+        "\n",
+        "    if labels:\n",
+        "        labels = base_chart.mark_text(\n",
+        "            fontSize=13,\n",
+        "            align=\"left\",\n",
+        "            baseline=\"bottom\",\n",
+        "            dx=5,\n",
+        "        ).encode(text=\"text\")\n",
+        "        chart = circles + labels\n",
+        "    else:\n",
+        "        chart = circles\n",
+        "\n",
+        "    return chart\n",
+        "\n",
+        "source = pd.DataFrame(\n",
+        "    {\n",
+        "        \"text\": df.index,\n",
+        "        \"x\": df[\"reduced\"].apply(lambda x: x[0]).to_list(),\n",
+        "        \"y\": df[\"reduced\"].apply(lambda x: x[1]).to_list(),\n",
+        "    }\n",
+        ")\n",
+        "\n",
+        "scatterplot(source, labels=True,  width=400, height=300)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "yLrQl_eKnbjg"
+      },
+      "source": [
+        "## Step 5 - using the more advanced e5 model, see that questions can be matched with answers"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GXkl-MA7kyQw"
+      },
+      "outputs": [],
+      "source": [
+        "## using e5_model previously loaded\n",
+        "input_texts = [\n",
+        "    'query: how much protein should a female human eat',\n",
+        "    'query: summit define',\n",
+        "    \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n",
+        "    \"passage: Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.\",\n",
+        "    \"passage: I am the very model of a modern Major-General / I've information vegetable, animal, and mineral / I know the kings of England, and I quote the fights historical / From Marathon to Waterloo, in order categorical / I'm very well acquainted, too, with matters mathematical\",\n",
+        "    \"passage: When, in the course of human events, it becomes necessary for one people to dissolve the political bands which have connected them with another, and to assume, among the powers of the earth\",\n",
+        "    \"passage: It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.\",\n",
+        "]\n",
+        "embeddings = e5_model.encode(input_texts, normalize_embeddings=True)\n",
+        "\n",
+        "\n",
+        "# let's see what we got, though truncate the embeddings to just the first 5 dimensions\n",
+        "print(f\"embedding dimensions: {embeddings[0].size}\")\n",
+        "print(f\"first query: {list(embeddings[0])[:3] + ['...']}\")\n",
+        "\n",
+        "\n",
+        "# wrap embeddings with a DataFrame\n",
+        "df = pd.DataFrame(\n",
+        "    [\n",
+        "      [embeddings[0]],\n",
+        "      [embeddings[1]],\n",
+        "      [embeddings[2]],\n",
+        "      [embeddings[3]],\n",
+        "      [embeddings[4]],\n",
+        "      [embeddings[5]],\n",
+        "      [embeddings[6]],\n",
+        "    ],\n",
+        "    index=[\n",
+        "        \"q: protein\",\n",
+        "        \"q: summit\",\n",
+        "        \"p: protein guide\",\n",
+        "        \"p: summit def\",\n",
+        "        \"p: penzanse\",\n",
+        "        \"p: dec of ind\",\n",
+        "        \"p: austen\"\n",
+        "        ], columns=[\"embeddings\"]\n",
+        ")\n",
+        "\n",
+        "# Initialize the PCA reducer to convert embeddings into arrays of length of 2\n",
+        "reducer = PCA(n_components=2)\n",
+        "\n",
+        "# Reduce the embeddings, store them in a new dataframe column and display their shape\n",
+        "df[\"reduced\"] = reducer.fit_transform(np.stack(df[\"embeddings\"])).tolist()\n",
+        "\n",
+        "source = pd.DataFrame(\n",
+        "    {\n",
+        "        \"text\": df.index,\n",
+        "        \"x\": df[\"reduced\"].apply(lambda x: x[0]).to_list(),\n",
+        "        \"y\": df[\"reduced\"].apply(lambda x: x[1]).to_list(),\n",
+        "    }\n",
+        ")\n",
+        "\n",
+        "scatterplot(source, labels=True,  width=400, height=300)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "738GN3qE2lM8"
+      },
+      "source": [
+        "## Step 6 : calculate the actual distance in 1024 dimensional space"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "QKAT1UBAzZfq"
+      },
+      "outputs": [],
+      "source": [
+        "from scipy.spatial import distance\n",
+        "\n",
+        "passages = [\n",
+        "    \"passage: As a general guideline, the CDC's average requirement of protein for women ages 19 to 70 is 46 grams per day. But, as you can see from this chart, you'll need to increase that if you're expecting or training for a marathon. Check out the chart below to see how much protein you should be eating each day.\",\n",
+        "    \"passage: Definition of summit for English Language Learners. : 1  the highest point of a mountain : the top of a mountain. : 2  the highest level. : 3  a meeting or series of meetings between the leaders of two or more governments.\",\n",
+        "    \"passage: I am the very model of a modern Major-General / I've information vegetable, animal, and mineral / I know the kings of England, and I quote the fights historical / From Marathon to Waterloo, in order categorical / I'm very well acquainted, too, with matters mathematical\",\n",
+        "    \"passage: When, in the course of human events, it becomes necessary for one people to dissolve the political bands which have connected them with another, and to assume, among the powers of the earth\",\n",
+        "    \"passage: It is a truth universally acknowledged, that a single man in possession of a good fortune, must be in want of a wife.\",\n",
+        "]\n",
+        "\n",
+        "def chunks_by_distance(passages, query_text, model):\n",
+        "  embeddings = model.encode(passages, normalize_embeddings=True)\n",
+        "  query_embedding = model.encode(query_text, normalize_embeddings=True)\n",
+        "  distances = []\n",
+        "  for index, passage in enumerate(passages):\n",
+        "    cos_distance = distance.cosine(embeddings[index], query_embedding)\n",
+        "    distances.append((passage, cos_distance))\n",
+        "  sorted_passages = sorted(distances, key=lambda x: x[1])\n",
+        "\n",
+        "  return sorted_passages\n",
+        "\n",
+        "protein_query = 'query: how much protein should a female human eat'\n",
+        "sorted_passages = chunks_by_distance(passages, protein_query, e5_model)\n",
+        "\n",
+        "for passage, dist in sorted_passages:\n",
+        "      print(f\"{passage[:40]} - Cosine distance {dist:.12f}\")\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "fTRQlTcO_jC4"
+      },
+      "source": [
+        "## OKAY let's work with an actual large document"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "oxo004rhYcDH"
+      },
+      "outputs": [],
+      "source": [
+        "wikipedia_spacecraft = [\n",
+        "{\n",
+        "  \"id\": \"37910\",\n",
+        "  \"title\": \"Spacecraft\",\n",
+        "  \"text\": \"A spacecraft is a vehicle that is designed to fly in outer space. A type of artificial satellite, spacecraft are used for a variety of purposes, including communications, Earth observation, meteorology, navigation, space colonization, planetary exploration, and transportation of humans and cargo. All spacecraft except single-stage-to-orbit vehicles cannot get into space on their own, and require a launch vehicle (carrier rocket). On a sub-orbital spaceflight, a space vehicle enters space and then returns to the surface without having gained sufficient energy or velocity to make a full Earth orbit. For orbital spaceflights, spacecraft enter closed orbits around the Earth or around other celestial bodies. Spacecraft used for human spaceflight carry people on board as crew or passengers from start or on orbit (space stations) only, whereas those used for robotic space missions operate either autonomously or telerobotically. Robotic spacecraft used to support scientific research are space probes. Robotic spacecraft that remain in orbit around a planetary body are artificial satellites. To date, only a handful of interstellar probes, such as Pioneer 10 and 11, Voyager 1 and 2, and New Horizons, are on trajectories that leave the Solar System. Orbital spacecraft may be recoverable or not. Most are not. Recoverable spacecraft may be subdivided by a method of reentry to Earth into non-winged space capsules and winged spaceplanes. Recoverable spacecraft may be reusable (can be launched again or several times, like the SpaceX Dragon and the Space Shuttle orbiters) or expendable (like the Soyuz). In recent years, more space agencies are tending towards reusable spacecraft. Humanity has achieved space flight, but only a few nations have the technology for orbital launches: Russia (RSA or \\\"Roscosmos\\\"), the United States (NASA), the member states of the European Space Agency (ESA), Japan (JAXA), China (CNSA), India (ISRO), Taiwan National Chung-Shan Institute of Science and Technology, Taiwan National Space Organization (NSPO), Israel (ISA), Iran (ISA), and North Korea (NADA). In addition, several private companies have developed or are developing the technology for orbital launches independently from government agencies. The most prominent examples of such companies are SpaceX and Blue Origin. ==History== A German V-2 became the first spacecraft when it reached an altitude of 189 km in June 1944 in Peenemünde, Germany.Peenemünde (Dokumentation) Berlin: Moewig, 1984.. Sputnik 1 was the first artificial satellite. It was launched into an elliptical low Earth orbit (LEO) by the Soviet Union on 4 October 1957. The launch ushered in new political, military, technological, and scientific developments; while the Sputnik launch was a single event, it marked the start of the Space Age.Dougall, Walter A. (Winter 2010) \\\"Shooting the duck\\\", American Heritage Apart from its value as a technological first, Sputnik 1 also helped to identify the upper atmospheric layer's density, by measuring the satellite's orbital changes. It also provided data on radio-signal distribution in the ionosphere. Pressurized nitrogen in the satellite's false body provided the first opportunity for meteoroid detection. Sputnik 1 was launched during the International Geophysical Year from Site No.1/5, at the 5th Tyuratam range, in Kazakh SSR (now at the Baikonur Cosmodrome). The satellite travelled at , taking 96.2 minutes to complete an orbit, and emitted radio signals at 20.005 and 40.002 MHz While Sputnik 1 was the first spacecraft to orbit the Earth, other human- made objects had previously reached an altitude of 100 km, which is the height required by the international organization Fédération Aéronautique Internationale to count as a spaceflight. This altitude is called the Kármán line. In particular, in the 1940s there were several test launches of the V-2 rocket, some of which reached altitudes well over 100 km. ==Spacecraft types== ===Crewed spacecraft=== thumb|Apollo 17 command module in Lunar orbit As of 2016, only three nations have flown crewed spacecraft: USSR/Russia, USA, and China. The first crewed spacecraft was Vostok 1, which carried Soviet cosmonaut Yuri Gagarin into space in 1961, and completed a full Earth orbit. There were five other crewed missions which used a Vostok spacecraft. The second crewed spacecraft was named Freedom 7, and it performed a sub-orbital spaceflight in 1961 carrying American astronaut Alan Shepard to an altitude of just over . There were five other crewed missions using Mercury spacecraft. Other Soviet crewed spacecraft include the Voskhod, Soyuz, flown uncrewed as Zond/L1, L3, TKS, and the Salyut and Mir crewed space stations. Other American crewed spacecraft include the Gemini spacecraft, the Apollo spacecraft including the Apollo Lunar Module, the Skylab space station, the Space Shuttle with undetached European Spacelab and private US Spacehab space stations- modules, and the SpaceX Crew Dragon configuration of their Dragon 2. US company Boeing also developed and flown a spacecraft of their own, the CST-100, commonly referred to as Starliner, but a crewed flight is yet to occur. China developed, but did not fly Shuguang, and is currently using Shenzhou (its first crewed mission was in 2003). Except for the Space Shuttle, all of the recoverable crewed orbital spacecraft were space capsules. File:NASA spacecraft comparison.jpg|alt=Drawings of Mercury, Gemini capsules and Apollo spacecraft, with their launch vehicles|American Mercury, Gemini, and Apollo spacecraft File:Vostok Spacecraft Diagram.svg|Soviet Vostok capsule File:Voskhod 1 and 2.svg|alt=Line drawing of Voskhod capsules|Soviet Voskhod (variant of Vostok) File:Soyuz 7K-OK(A) drawing.svg|alt=Soyuz 7K-OK(A) drawing|1967 Soviet/Russian Soyuz spacecraft File:Post S-7 Shenzhou spacecraft.png|alt=Drawing of Shenzhou spacecraft|Chinese Shenzhou spacecraft The International Space Station, crewed since November 2000, is a joint venture between Russia, the United States, Canada and several other countries. ====Spaceplanes==== thumb|Columbia orbiter landing Spaceplanes are spacecraft that are built in the shape of, and function as, airplanes. The first example of such was the North American X-15 spaceplane, which conducted two crewed flights which reached an altitude of over 100 km in the 1960s. This first reusable spacecraft was air-launched on a suborbital trajectory on July 19, 1963. The first partially reusable orbital spacecraft, a winged non-capsule, the Space Shuttle, was launched by the USA on the 20th anniversary of Yuri Gagarin's flight, on April 12, 1981. During the Shuttle era, six orbiters were built, all of which have flown in the atmosphere and five of which have flown in space. Enterprise was used only for approach and landing tests, launching from the back of a Boeing 747 SCA and gliding to deadstick landings at Edwards AFB, California. The first Space Shuttle to fly into space was Columbia, followed by Challenger, Discovery, Atlantis, and Endeavour. Endeavour was built to replace Challenger when it was lost in January 1986. Columbia broke up during reentry in February 2003. The first automatic partially reusable spacecraft was the Buran-class shuttle, launched by the USSR on November 15, 1988, although it made only one flight and this was uncrewed. This spaceplane was designed for a crew and strongly resembled the U.S. Space Shuttle, although its drop-off boosters used liquid propellants and its main engines were located at the base of what would be the external tank in the American Shuttle. Lack of funding, complicated by the dissolution of the USSR, prevented any further flights of Buran. The Space Shuttle was subsequently modified to allow for autonomous re-entry in case of necessity. Per the Vision for Space Exploration, the Space Shuttle was retired in 2011 mainly due to its old age and high cost of program reaching over a billion dollars per flight. The Shuttle's human transport role is to be replaced by SpaceX's SpaceX Dragon 2 and Boeing's CST-100 Starliner. Dragon 2's first crewed flight occurred on May 30, 2020. The Shuttle's heavy cargo transport role is to be replaced by expendable rockets such as the Space Launch System and ULA's Vulcan rocket, as well as the commercial launch vehicles. Scaled Composites' SpaceShipOne was a reusable suborbital spaceplane that carried pilots Mike Melvill and Brian Binnie on consecutive flights in 2004 to win the Ansari X Prize. The Spaceship Company will build its successor SpaceShipTwo. A fleet of SpaceShipTwos operated by Virgin Galactic was planned to begin reusable private spaceflight carrying paying passengers in 2014, but was delayed after the crash of VSS Enterprise. ===Uncrewed spacecraft=== Uncrewed spacecraft are spacecraft without people on board. Uncrewed spacecraft may have varying levels of autonomy from human input; they may be remote controlled, remote guided or even autonomous, meaning they have a pre-programmed list of operations, which they will execute unless otherwise instructed. Many space missions are more suited to telerobotic rather than crewed operation, due to lower cost and lower risk factors. In addition, some planetary destinations such as Venus or the vicinity of Jupiter are too hostile for human survival. Outer planets such as Saturn, Uranus, and Neptune are too distant to reach with current crewed spaceflight technology, so telerobotic probes are the only way to explore them. Telerobotics also allows exploration of regions that are vulnerable to contamination by Earth micro-organisms since spacecraft can be sterilized. Humans can not be sterilized in the same way as a spaceship, as they coexist with numerous micro-organisms, and these micro-organisms are also hard to contain within a spaceship or spacesuit. Multiple space probes were sent to study Moon, the planets, the Sun, multiple small Solar System bodies (comets and asteroids). Special class of uncrewed spacecraft is space telescopes, a telescope in outer space used to observe astronomical objects. The first operational telescopes were the American Orbiting Astronomical Observatory, OAO-2 launched in 1968, and the Soviet Orion 1 ultraviolet telescope aboard space station Salyut 1 in 1971. Space telescopes avoid the filtering and distortion (scintillation) of electromagnetic radiation which they observe, and avoid light pollution which ground-based observatories encounter. The best-known examples are Hubble Space Telescope and James Webb Space Telescope. Cargo spacecraft are designed to carry cargo, possibly to support space stations' operation by transporting food, propellant and other supplies. Automated cargo spacecraft have been used since 1978 and have serviced Salyut 6, Salyut 7, Mir, the International Space Station and Tiangong space station. ====Fastest spacecraft==== *Parker Solar Probe (estimated at first sun close pass, will reach at final perihelion) *Helios I and II Solar Probes () ==== Furthest spacecraft from the Sun ==== * Voyager 1 at 156.13 AU as of April 2022, traveling outward at about * Pioneer 10 at 122.48 AU as of December 2018, traveling outward at about *Voyager 2 at 122.82 AU as of January 2020, traveling outward at about *Pioneer 11 at 101.17 AU as of December 2018, traveling outward at about ==Subsystems== A spacecraft astrionics system comprises different subsystems, depending on the mission profile. Spacecraft subsystems comprise the spacecraft's bus and may include attitude determination and control (variously called ADAC, ADC, or ACS), guidance, navigation and control (GNC or GN&C;), communications (comms), command and data handling (CDH or C&DH;), power (EPS), thermal control (TCS), propulsion, and structures. Attached to the bus are typically payloads. ; Life support : Spacecraft intended for human spaceflight must also include a life support system for the crew. ; Attitude control : A Spacecraft needs an attitude control subsystem to be correctly oriented in space and respond to external torques and forces properly. The attitude control subsystem consists of sensors and actuators, together with controlling algorithms. The attitude- control subsystem permits proper pointing for the science objective, sun pointing for power to the solar arrays and earth pointing for communications. ; GNC : Guidance refers to the calculation of the commands (usually done by the CDH subsystem) needed to steer the spacecraft where it is desired to be. Navigation means determining a spacecraft's orbital elements or position. Control means adjusting the path of the spacecraft to meet mission requirements. ; Command and data handling : The C&DH; subsystem receives commands from the communications subsystem, performs validation and decoding of the commands, and distributes the commands to the appropriate spacecraft subsystems and components. The CDH also receives housekeeping data and science data from the other spacecraft subsystems and components, and packages the data for storage on a data recorder or transmission to the ground via the communications subsystem. Other functions of the CDH include maintaining the spacecraft clock and state-of-health monitoring. ; Communications : Spacecraft, both robotic and crewed, have various communications systems for communication with terrestrial stations and for inter-satellite service. Technologies include space radio station and optical communication. In addition, some spacecraft payloads are explicitly for the purpose of ground–ground communication using receiver/retransmitter electronic technologies. ; Power : Spacecraft need an electrical power generation and distribution subsystem for powering the various spacecraft subsystems. For spacecraft near the Sun, solar panels are frequently used to generate electrical power. Spacecraft designed to operate in more distant locations, for example Jupiter, might employ a radioisotope thermoelectric generator (RTG) to generate electrical power. Electrical power is sent through power conditioning equipment before it passes through a power distribution unit over an electrical bus to other spacecraft components. Batteries are typically connected to the bus via a battery charge regulator, and the batteries are used to provide electrical power during periods when primary power is not available, for example when a low Earth orbit spacecraft is eclipsed by Earth. ; Thermal control : Spacecraft must be engineered to withstand transit through Earth's atmosphere and the space environment. They must operate in a vacuum with temperatures potentially ranging across hundreds of degrees Celsius as well as (if subject to reentry) in the presence of plasmas. Material requirements are such that either high melting temperature, low density materials such as beryllium and reinforced carbon–carbon or (possibly due to the lower thickness requirements despite its high density) tungsten or ablative carbon–carbon composites are used. Depending on mission profile, spacecraft may also need to operate on the surface of another planetary body. The thermal control subsystem can be passive, dependent on the selection of materials with specific radiative properties. Active thermal control makes use of electrical heaters and certain actuators such as louvers to control temperature ranges of equipments within specific ranges. ; Spacecraft propulsion : Spacecraft may or may not have a propulsion subsystem, depending on whether or not the mission profile calls for propulsion. The Swift spacecraft is an example of a spacecraft that does not have a propulsion subsystem. Typically though, LEO spacecraft include a propulsion subsystem for altitude adjustments (drag make-up maneuvers) and inclination adjustment maneuvers. A propulsion system is also needed for spacecraft that perform momentum management maneuvers. Components of a conventional propulsion subsystem include fuel, tankage, valves, pipes, and thrusters. The thermal control system interfaces with the propulsion subsystem by monitoring the temperature of those components, and by preheating tanks and thrusters in preparation for a spacecraft maneuver. ; Structures : Spacecraft must be engineered to withstand launch loads imparted by the launch vehicle, and must have a point of attachment for all the other subsystems. Depending on mission profile, the structural subsystem might need to withstand loads imparted by entry into the atmosphere of another planetary body, and landing on the surface of another planetary body. ; Payload : The payload depends on the mission of the spacecraft, and is typically regarded as the part of the spacecraft \\\"that pays the bills\\\". Typical payloads could include scientific instruments (cameras, telescopes, or particle detectors, for example), cargo, or a human crew. ; Ground segment : The ground segment, though not technically part of the spacecraft, is vital to the operation of the spacecraft. Typical components of a ground segment in use during normal operations include a mission operations facility where the flight operations team conducts the operations of the spacecraft, a data processing and storage facility, ground stations to radiate signals to and receive signals from the spacecraft, and a voice and data communications network to connect all mission elements. ; Launch vehicle : The launch vehicle propels the spacecraft from Earth's surface, through the atmosphere, and into an orbit, the exact orbit being dependent on the mission configuration. The launch vehicle may be expendable or reusable. ==See also== *Astrionics *Commercial astronaut *Flying saucer *List of crewed spacecraft *List of fictional spacecraft *NewSpace *Spacecraft design *Space exploration *Space launch *Spaceships in science fiction *Space suit *Spaceflight records *Starship *Timeline of Solar System exploration *U.S. Space Exploration History on U.S. Stamps == References == === Citations === === Sources === * * ==External links== *NASA: Space Science Spacecraft Missions *NSSDC Master Catalog Spacecraft Query Form *Early History of Spacecraft *Basics of Spaceflight tutorial from JPL/Caltech *International Spaceflight Museum Category:Astronautics Category:Pressure vessels\",\n",
+        "  \"categories\": [\n",
+        "    \"Astronautics\",\n",
+        "    \"Pressure vessels\"\n",
+        "  ]\n",
+        "},\n",
+        " ]"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "0dwj-z5z7tnu"
+      },
+      "source": [
+        "## Step 7: Truncation is a problem for long texts\n",
+        "\n",
+        "The semantic relevance will be low because most of the text is ignored in the vector computation."
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xUYUyZ4G3-ns"
+      },
+      "outputs": [],
+      "source": [
+        "text =        wikipedia_spacecraft[0][\"text\"]\n",
+        "embeddings =  e5_model.encode(text, normalize_embeddings=True)\n",
+        "\n",
+        "tokenized_text =        e5_model.tokenizer(text)[\"input_ids\"]\n",
+        "model_max_seq_length =  e5_model.get_max_seq_length()\n",
+        "text_token_count =      len(tokenized_text)\n",
+        "\n",
+        "print(f\"text tokens {text_token_count} | model max sequence length {model_max_seq_length}\")\n",
+        "\n",
+        "if text_token_count > model_max_seq_length:\n",
+        "    print(f\"❗❗ The text will be truncated.❗❗\")\n",
+        "else:\n",
+        "    print(f\"The text will not be truncated.\")"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "g0e_QS-i8rkm"
+      },
+      "source": [
+        "## Step 8: Visualizing Chunking Strategies\n",
+        "\n",
+        "First some utility libraries"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "OSMKdU-d8yzw"
+      },
+      "outputs": [],
+      "source": [
+        "# Import Libraries\n",
+        "import os\n",
+        "import json\n",
+        "import textwrap\n",
+        "from pprint import pprint\n",
+        "from bs4 import BeautifulSoup\n",
+        "from IPython.display import HTML\n",
+        "#from elasticsearch import Elasticsearch, helpers\n",
+        "from langchain.text_splitter import RecursiveCharacterTextSplitter, \\\n",
+        "  SentenceTransformersTokenTextSplitter, \\\n",
+        "  CharacterTextSplitter, \\\n",
+        "  TextSplitter\n",
+        "\n",
+        "## Process splitting and display\n",
+        "def split_and_print(documents, splitter, ret=False):\n",
+        "    es_docs = []\n",
+        "    for doc in documents:\n",
+        "        passages = []\n",
+        "\n",
+        "        for chunk in splitter.split_text(doc['text']):\n",
+        "            passages.append({\n",
+        "                \"text\": chunk,\n",
+        "            })\n",
+        "        es_docs.append(passages)\n",
+        "\n",
+        "    print(f'Number of chunks: {len(passages)}' + '\\n')\n",
+        "    display(HTML(process_chunks(passages)))\n",
+        "    if ret:\n",
+        "      return passages\n",
+        "    else:\n",
+        "      return False\n",
+        "\n",
+        "\n",
+        "## Character Splitter\n",
+        "def split_by_recursive_char(documents,\n",
+        "                  chunk_size: int = 200,\n",
+        "                  chunk_overlap: int = 0\n",
+        "                  ):\n",
+        "    '''Chunking by character count'''\n",
+        "\n",
+        "    text_splitter = RecursiveCharacterTextSplitter(\n",
+        "        chunk_size=chunk_size,\n",
+        "        chunk_overlap=chunk_overlap,\n",
+        "        length_function=len,\n",
+        "        is_separator_regex=False,\n",
+        "    )\n",
+        "    split_and_print(documents, text_splitter)\n",
+        "\n",
+        "\n",
+        "def split_by_text(documents,\n",
+        "                  chunk_size: int = 200,\n",
+        "                  chunk_overlap: int = 0\n",
+        "                  ):\n",
+        "    '''Chunking by character count'''\n",
+        "\n",
+        "    text_splitter = CharacterTextSplitter(\n",
+        "        chunk_size=chunk_size,\n",
+        "        chunk_overlap=chunk_overlap,\n",
+        "        length_function=len,\n",
+        "        is_separator_regex=False,\n",
+        "    )\n",
+        "    r = split_and_print(documents, text_splitter)\n",
+        "\n",
+        "\n",
+        "\n",
+        "## Token Splitter\n",
+        "def split_by_token(documents,\n",
+        "                  tokens_per_chunk: int = 2,\n",
+        "                  chunk_overlap: int = 0,\n",
+        "                  ret=False\n",
+        "                 ):\n",
+        "    '''Chunking by BERT Transformer Tokens'''\n",
+        "\n",
+        "    text_splitter = SentenceTransformersTokenTextSplitter(\n",
+        "        tokens_per_chunk=tokens_per_chunk,\n",
+        "        chunk_overlap=chunk_overlap,\n",
+        "        model_name='intfloat/e5-large-v2' # 512 token input limit\n",
+        "    )\n",
+        "    r = split_and_print(documents, text_splitter, ret=ret)\n",
+        "    if ret:\n",
+        "      return r\n",
+        "\n",
+        "\n",
+        "\n",
+        "\n",
+        "## Printing and Highlighting functions ##\n",
+        "\n",
+        "color_list = [\n",
+        "    \"yellow\",\n",
+        "    \"red\",\n",
+        "    \"lightgreen\",\n",
+        "    \"lightblue\",\n",
+        "    \"lightpink\",\n",
+        "    \"#F0A3FF\",  # Vivid orchid\n",
+        "    \"#0075DC\",  # Blue ribbon\n",
+        "    \"#2BCE48\",  # Slimy green\n",
+        "    \"#FFCC99\",  # Peach-orange\n",
+        "    \"#94FFB5\",  # Mint green\n",
+        "\n",
+        "]\n",
+        "\n",
+        "def find_overlap(text1, text2):\n",
+        "    min_len = min(len(text1), len(text2))\n",
+        "    for i in range(min_len, 0, -1):\n",
+        "        if text1[-i:] == text2[:i]:\n",
+        "            return text1[-i:]\n",
+        "    return ''\n",
+        "\n",
+        "###################################################################################\n",
+        "# Highted text -> White\n",
+        "# Normal text -> Black\n",
+        "\n",
+        "### Uncomment these 3 functions if you are running in light mode\n",
+        "\n",
+        "# def highlight_first_occurrence(text, substring, color):\n",
+        "#     index = text.find(substring)\n",
+        "#     if index != -1:\n",
+        "#         return (text[:index] +\n",
+        "#                 f\"<span style='background-color: {color};'>{text[index:index+len(substring)]}</span>\" +\n",
+        "#                 text[index+len(substring):])\n",
+        "#     return text\n",
+        "\n",
+        "# def highlight_last_occurrence(text, substring, color):\n",
+        "#     index = text.rfind(substring)\n",
+        "#     if index != -1:\n",
+        "#         return (text[:index] +\n",
+        "#                 f\"<span style='background-color: {color};'>{text[index:index+len(substring)]}</span>\" +\n",
+        "#                 text[index+len(substring):])\n",
+        "#     return text\n",
+        "\n",
+        "# def process_chunks(chunks, colors=color_list):\n",
+        "#     html_output = \"\"\n",
+        "#     for i in range(len(chunks) - 1):\n",
+        "#         overlap = find_overlap(chunks[i][\"text\"], chunks[i + 1][\"text\"])\n",
+        "#         color = colors[i % len(colors)]  # Cycle through the provided colors\n",
+        "#         if overlap:\n",
+        "#             chunks[i][\"text\"] = highlight_last_occurrence(chunks[i][\"text\"], overlap, color)\n",
+        "#             chunks[i + 1][\"text\"] = highlight_first_occurrence(chunks[i + 1][\"text\"], overlap, color)\n",
+        "#         html_output += chunks[i][\"text\"] + \"<br><br>\"\n",
+        "#     html_output += chunks[-1][\"text\"]  # Add the last chunk\n",
+        "#     return html_output\n",
+        "\n",
+        "###################################################################################\n",
+        "# Highted text -> Black\n",
+        "# Normal text -> White\n",
+        "\n",
+        "### Comment out these 3 functions if running in light modes\n",
+        "\n",
+        "def highlight_first_occurrence(text, substring, color):\n",
+        "    index = text.find(substring)\n",
+        "    if index != -1:\n",
+        "        return (text[:index] +\n",
+        "                f\"<span style='background-color: {color}; color: black;'>{text[index:index+len(substring)]}</span>\" +\n",
+        "                text[index+len(substring):])\n",
+        "    return text\n",
+        "\n",
+        "def highlight_last_occurrence(text, substring, color):\n",
+        "    index = text.rfind(substring)\n",
+        "    if index != -1:\n",
+        "        return (text[:index] +\n",
+        "                f\"<span style='background-color: {color}; color: black;'>{text[index:index+len(substring)]}</span>\" +\n",
+        "                text[index+len(substring):])\n",
+        "    return text\n",
+        "\n",
+        "\n",
+        "chunk_max_display = 10\n",
+        "\n",
+        "def process_chunks(chunks, colors=color_list):\n",
+        "    html_output = \"\"\n",
+        "    for i in range(min(chunk_max_display -1,len(chunks) - 1)):\n",
+        "        overlap = find_overlap(chunks[i][\"text\"], chunks[i + 1][\"text\"])\n",
+        "        color = colors[i % len(colors)]  # Cycle through the provided colors\n",
+        "        if overlap:\n",
+        "            chunks[i][\"text\"] = highlight_last_occurrence(chunks[i][\"text\"], overlap, color)\n",
+        "            chunks[i + 1][\"text\"] = highlight_first_occurrence(chunks[i + 1][\"text\"], overlap, color)\n",
+        "        # Wrap each chunk of text in a span with white text color\n",
+        "        html_output += f\"<span style='color: gray;'>{chunks[i]['text']}</span><br><br>\"\n",
+        "    # Add the last chunk with white text color\n",
+        "    html_output += f\"<span style='color: gray;'>{chunks[-1]['text']}</span>\"\n",
+        "    html_output += f\"<br/><br/><span style='color: gray;'>... additional chunks omitted</span>\"\n",
+        "    return html_output"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "TZbfLoQhRFXy"
+      },
+      "source": [
+        "## Step 9: Three Chunking Strategies\n",
+        "\n",
+        "[LangChain recursive character text splitter](https://python.langchain.com/docs/modules/data_connection/document_transformers/recursive_text_splitter)\n",
+        "\n",
+        "[LangChain splitting by tokens](https://python.langchain.com/docs/modules/data_connection/document_transformers/split_by_token)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "b0ClYN7K9NF6"
+      },
+      "outputs": [],
+      "source": [
+        "split_by_recursive_char(wikipedia_spacecraft, chunk_size=1024, chunk_overlap=0)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "GXAMcK2I9vop"
+      },
+      "outputs": [],
+      "source": [
+        "split_by_recursive_char(wikipedia_spacecraft, chunk_size=1024, chunk_overlap=50)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "LKliFbbn97ky"
+      },
+      "outputs": [],
+      "source": [
+        "token_c500_o0 = split_by_token(wikipedia_spacecraft, tokens_per_chunk=500, chunk_overlap=0, ret=True)"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aAI1bgTR-ZXw"
+      },
+      "outputs": [],
+      "source": [
+        "token_c500_o250 = split_by_token(wikipedia_spacecraft, tokens_per_chunk=500, chunk_overlap=100, ret=True)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "BUApSzG6SXfK"
+      },
+      "source": [
+        "## Step 10: Let's comapare using the whole passage vs the best chunk with ChatGPT"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "xpjUDLUKEtSi"
+      },
+      "outputs": [],
+      "source": [
+        "the_full_text = wikipedia_spacecraft[0][\"text\"]\n",
+        "\n",
+        "question = \"What three countries have flown manned spacecraft?\"\n",
+        "\n",
+        "def gen_system_prompt(context):\n",
+        "  return f\"\"\"You are an AI assistant than answers questions based on the provided context.\n",
+        "Use only the provided context.  If the provided context does not have the answer\n",
+        "reply only with 'I do not know'\n",
+        "\n",
+        "Context: {context}\"\"\"\n",
+        "\n",
+        "import textwrap\n",
+        "# wrap text when printing, because colab scrolls output to the right too much\n",
+        "def wrap_text(text, width):\n",
+        "    wrapped_text = textwrap.wrap(text, width)\n",
+        "    return '\\n'.join(wrapped_text)\n",
+        "\n",
+        "def print_light_blue(text):\n",
+        "    print(f'\\033[94m{text}\\033[0m')\n",
+        "\n",
+        "def chatCompletion(messages):\n",
+        "\n",
+        "    client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n",
+        "    completion = client.chat.completions.create(\n",
+        "        model=openai.default_model,\n",
+        "        max_tokens=150,\n",
+        "        messages=messages\n",
+        "    )\n",
+        "    print_light_blue(f\"\\t{completion.usage}\")\n",
+        "\n",
+        "    return completion\n",
+        "\n",
+        "def chatWithSpacePassage(prompt, context):\n",
+        "    messages = [\n",
+        "        {\"role\": \"system\", \"content\": gen_system_prompt(context)},\n",
+        "        {\"role\": \"user\", \"content\": prompt}\n",
+        "      ]\n",
+        "    print_light_blue(\"Prompt:\")\n",
+        "    print_light_blue(wrap_text(messages[1][\"content\"],70))\n",
+        "    completion = chatCompletion(messages)\n",
+        "\n",
+        "    response_text = completion.choices[0].message.content\n",
+        "\n",
+        "    return wrap_text(response_text,70)\n",
+        "\n",
+        "\n",
+        "ai_response = chatWithSpacePassage(question, the_full_text)\n",
+        "\n",
+        "print(ai_response)\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nI6AQ0ACSZ9n"
+      },
+      "source": [
+        "## Step 11: Reducing LLM inference costs by 91%\n",
+        "\n",
+        "We'll deep dive into how to use Elasticearch to speed up the vector search and other kinds of Search Powered AI in the next part of the workshop.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "RD6Znhd7Gfiu"
+      },
+      "outputs": [],
+      "source": [
+        "# the_full_text = wikipedia_spacecraft[0][\"text\"]\n",
+        "\n",
+        "text_splitter = RecursiveCharacterTextSplitter(\n",
+        "    chunk_size=1024,\n",
+        "    chunk_overlap=50,\n",
+        "    length_function=len\n",
+        ")\n",
+        "\n",
+        "chunks = text_splitter.split_text(the_full_text)\n",
+        "\n",
+        "## Vectorizing can take time so I'm only processing the first few chunks\n",
+        "sorted_chunks = chunks_by_distance(chunks[:5], question, e5_model)\n",
+        "\n",
+        "## top 3 chunk distances\n",
+        "for passage, dist in sorted_chunks[:3]:\n",
+        "  print(f\"{passage[:40]} - Cosine distance {dist:.12f}\")\n",
+        "print(\"\")\n",
+        "\n",
+        "top_passage = sorted_chunks[0][0]\n",
+        "print(wrap_text(top_passage, 70))\n",
+        "print(\"\")\n",
+        "\n",
+        "ai_response = chatWithSpacePassage(question, top_passage)\n",
+        "print(ai_response)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "🛑 Stop Here 🛑\n",
+        "\n",
+        "This Ends Lab 3-1\n",
+        "<hr/>"
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.13"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/notebooks/genai_colab_lab_4.ipynb b/notebooks/genai_colab_lab_4.ipynb
new file mode 100644
index 0000000..125acb3
--- /dev/null
+++ b/notebooks/genai_colab_lab_4.ipynb
@@ -0,0 +1,380 @@
+{
+  "cells": [
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "nFbQGw2POViM"
+      },
+      "source": [
+        "# Lab 4 - RAG"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "iSVyZRkvmqyc"
+      },
+      "source": [
+        "## Setup Environment\n",
+        "The following code loads the environment variables, images for the RAG App, and libraries required to run this notebook.\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "BwWfBNdUmqyd"
+      },
+      "outputs": [],
+      "source": [
+        "FILE=\"GenAI Lab 4\"\n",
+        "\n",
+        "# ! pip install -qqq git+https://github.com/elastic/notebook-workshop-loader.git@main\n",
+        "from notebookworkshoploader import loader\n",
+        "import os\n",
+        "from dotenv import load_dotenv\n",
+        "\n",
+        "if os.path.isfile(\"../env\"):\n",
+        "    load_dotenv(\"../env\", override=True)\n",
+        "    print('Successfully loaded environment variables from local env file')\n",
+        "else:\n",
+        "    loader.load_remote_env(file=FILE, env_url=\"https://notebook-workshop-api-voldmqr2bq-uc.a.run.app\")"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "Ln-8SRvAI-jS"
+      },
+      "outputs": [],
+      "source": [
+        "# ! pip install -qqq tiktoken==0.5.2 cohere==4.38 openai==1.3.9\n",
+        "# ! pip install -qqq streamlit==1.30.0 elasticsearch==8.12.0 elastic-apm==6.20.0 inquirer==3.2.1 python-dotenv==1.0.0\n",
+        "# ! pip install -qqq elasticsearch-llm-cache==0.9.5\n",
+        "! echo \"github codespaces has pre-installed these libraries\""
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "E0uQujqZclf0"
+      },
+      "source": [
+        "## <font color=Green>Labs</font>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "IvZYvYkE62Df"
+      },
+      "source": [
+        "### <font color=Orange>Lab 4.1 - Gathering Semantic documents from Elasticsearch</font>\n",
+        "This first exercise will allow us to see an example of returing semantically matching documents from Elasticsearch.\n",
+        "\n",
+        "It is not too important to understand all the Elasticsearch DSL syntax at this stage.\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "DsCwwEc95qv8"
+      },
+      "source": [
+        "#### Run the code block below to set up the query function\n",
+        "---\n",
+        "\n"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "l7lu2VBg6vMN"
+      },
+      "outputs": [],
+      "source": [
+        "import os\n",
+        "import openai\n",
+        "from elasticsearch import Elasticsearch\n",
+        "import time\n",
+        "import json\n",
+        "import textwrap\n",
+        "\n",
+        "\n",
+        "index = os.environ['ELASTIC_INDEX_DOCS_W']\n",
+        "\n",
+        "# Create Elasticsearch Connection\n",
+        "es = Elasticsearch(\n",
+        "            cloud_id=os.environ['ELASTIC_CLOUD_ID_W'],\n",
+        "            api_key=(os.environ['ELASTIC_APIKEY_ID_W']),\n",
+        "            request_timeout=30\n",
+        "            )\n",
+        "\n",
+        "\n",
+        "# Search Function\n",
+        "def es_hybrid_search(question):\n",
+        "    query = {\n",
+        "      \"nested\": {\n",
+        "        \"path\": \"passages\",\n",
+        "        \"query\": {\n",
+        "          \"bool\": {\n",
+        "            \"must\": [\n",
+        "              {\n",
+        "                \"match\": {\n",
+        "                  \"passages.text\": question\n",
+        "                }\n",
+        "              }\n",
+        "            ]\n",
+        "          }\n",
+        "        }\n",
+        "      }\n",
+        "    }\n",
+        "\n",
+        "    knn = {\n",
+        "      \"inner_hits\": {\n",
+        "        \"_source\": False,\n",
+        "        \"fields\": [\n",
+        "          \"passages.text\"\n",
+        "        ]\n",
+        "      },\n",
+        "      \"field\": \"passages.embeddings\",\n",
+        "      \"k\": 5,\n",
+        "      \"num_candidates\": 100,\n",
+        "      \"query_vector_builder\": {\n",
+        "        \"text_embedding\": {\n",
+        "          \"model_id\": \"sentence-transformers__all-distilroberta-v1\",\n",
+        "          \"model_text\": question\n",
+        "        }\n",
+        "      }\n",
+        "    }\n",
+        "\n",
+        "    rank = {\n",
+        "      \"rrf\": {}\n",
+        "    }\n",
+        "\n",
+        "    fields = [\n",
+        "      \"title\",\n",
+        "      \"text\"\n",
+        "    ]\n",
+        "\n",
+        "    size = 5\n",
+        "\n",
+        "    resp = es.search(index=index,\n",
+        "                  #query=query,\n",
+        "                  knn=knn,\n",
+        "                  fields=fields,\n",
+        "                  size=size,\n",
+        "                  #rank=rank,\n",
+        "                  source=False\n",
+        "                  )\n",
+        "\n",
+        "    title_text = []\n",
+        "    for doc in resp['hits']['hits']:\n",
+        "      title_text.append( { 'title' : doc['fields']['title'][0],\n",
+        "        'passage' : doc['inner_hits']['passages']['hits']['hits'][0]['fields']['passages'][0]['text'][0] }\n",
+        "                         )\n",
+        "\n",
+        "    return title_text"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "eKBumt6W68wE"
+      },
+      "source": [
+        "#### Example Semantic Search With Elastic"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "h4hlknOP-Tba"
+      },
+      "outputs": [],
+      "source": [
+        "user_question = \"Who is Batman?\""
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "qpHyxzev4WZm"
+      },
+      "outputs": [],
+      "source": [
+        "es_augment_docs = es_hybrid_search(user_question)\n",
+        "\n",
+        "print('Wikipedia titles returned:\\n')\n",
+        "for hit, wiki in enumerate(es_augment_docs):\n",
+        "  print(f\"{hit} - {wiki['title'] }\" )"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "dPVcfU_26rGI"
+      },
+      "source": [
+        "### <font color=Orange>Lab 4.2 - Sending Elasticsearch docs with a prompt for a RAG response</font>"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "UZRE3N0q61L3"
+      },
+      "source": [
+        "#### Run the code below to set up the LLM Connection"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "aWeL5ANw65ND"
+      },
+      "outputs": [],
+      "source": [
+        "import openai\n",
+        "from openai import OpenAI\n",
+        "import textwrap\n",
+        "\n",
+        "\n",
+        "# Configure OpenAI client\n",
+        "openai.api_key = os.environ['OPENAI_API_KEY']\n",
+        "openai.api_base = os.environ['OPENAI_API_BASE']\n",
+        "openai.default_model = os.environ['OPENAI_API_ENGINE']\n",
+        "openai.verify_ssl_certs = False\n",
+        "client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)\n",
+        "\n",
+        "if os.environ['ELASTIC_PROXY'] != \"True\":\n",
+        "    openai.api_type = os.environ['OPENAI_API_TYPE']\n",
+        "    openai.api_version = os.environ['OPENAI_API_VERSION']\n",
+        "\n",
+        "\n",
+        "# Text wrapper for colab readibility\n",
+        "def wrap_text(text):\n",
+        "    wrapped_text = textwrap.wrap(text, 70)\n",
+        "    return '\\n'.join(wrapped_text)\n",
+        "\n",
+        "\n",
+        "# Function to connect with LLM\n",
+        "def chat_gpt(client, question, passages):\n",
+        "\n",
+        "    system_prompt=\"You are a helpful assistant who answers questions from provided Wikipedia articles.\"\n",
+        "    user_prompt = f'''Answer the followng question: {question}\n",
+        "                    using only the wikipedia `passages` provided.\n",
+        "                    If the answer is not provided in the `passages` respond ONLY with:\n",
+        "                    \"I am unable to answer the user's question from the provided passage\" and nothing else.\n",
+        "\n",
+        "                  passages: {passages}\n",
+        "\n",
+        "                  AI response:\n",
+        "                  '''\n",
+        "\n",
+        "    # Prepare the messages for the ChatGPT API\n",
+        "    messages = [{\"role\": \"system\", \"content\": system_prompt},\n",
+        "                {\"role\": \"user\", \"content\": user_prompt}]\n",
+        "\n",
+        "    response = client.chat.completions.create(model=openai.default_model,\n",
+        "                                              temperature=0.2,\n",
+        "                                              messages=messages,\n",
+        "                                              )\n",
+        "    return response\n",
+        "#    return response.choices[0].message.content"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "pQ4ZijSv65tQ"
+      },
+      "source": [
+        "#### Pass the full prompt and wiki passages to LLM"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "MR-XrChD6-E0"
+      },
+      "outputs": [],
+      "source": [
+        "ai = chat_gpt(client, user_question, es_augment_docs)\n",
+        "print(f\"User Question: \\n{user_question}\\n\")\n",
+        "print(\"AI response:\")\n",
+        "print(wrap_text(ai.choices[0].message.content))"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "t7RmurdZNPg-"
+      },
+      "source": [
+        "### <font color=Orange>Lab 4.3 - Full RAG Application with UI</font>\n"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {},
+      "source": [
+        "## Main Script\n",
+        "We've placed the sample code in the streamlit folder of this repository\n",
+        "\n",
+        "Take a look at the code [streamlit/app.py](../streamlit/app.py)"
+      ]
+    },
+    {
+      "cell_type": "markdown",
+      "metadata": {
+        "id": "Wu0KfS0ESf6e"
+      },
+      "source": [
+        "## Streamlit\n",
+        "To start the Streamlit app you need to use the ```streamlit run``` command from the folder.  You can do this either from this notebook or the Visual Studio Code terminal provided in Github Codespaces"
+      ]
+    },
+    {
+      "cell_type": "code",
+      "execution_count": null,
+      "metadata": {
+        "id": "cHIHFID3NBXa"
+      },
+      "outputs": [],
+      "source": [
+        "! cd ../streamlit; streamlit run app.py "
+      ]
+    }
+  ],
+  "metadata": {
+    "colab": {
+      "provenance": []
+    },
+    "kernelspec": {
+      "display_name": "Python 3",
+      "name": "python3"
+    },
+    "language_info": {
+      "codemirror_mode": {
+        "name": "ipython",
+        "version": 3
+      },
+      "file_extension": ".py",
+      "mimetype": "text/x-python",
+      "name": "python",
+      "nbconvert_exporter": "python",
+      "pygments_lexer": "ipython3",
+      "version": "3.10.13"
+    }
+  },
+  "nbformat": 4,
+  "nbformat_minor": 0
+}
diff --git a/streamlit/.streamlit/config.toml b/streamlit/.streamlit/config.toml
new file mode 100644
index 0000000..ca4e7d7
--- /dev/null
+++ b/streamlit/.streamlit/config.toml
@@ -0,0 +1,2 @@
+[theme]
+base = "dark"
\ No newline at end of file
diff --git a/streamlit/app.py b/streamlit/app.py
index 2fbaf8c..f7710a7 100644
--- a/streamlit/app.py
+++ b/streamlit/app.py
@@ -3,189 +3,334 @@
 import openai
 from openai import OpenAI
 from elasticsearch import Elasticsearch
-from string import Template
 import elasticapm
+import base64
+from elasticsearch_llm_cache.elasticsearch_llm_cache import ElasticsearchLLMCache
+import time
+import json
+import textwrap
 
+######################################
+# Streamlit Configuration
+st.set_page_config(layout="wide")
+
+
+# wrap text when printing, because colab scrolls output to the right too much
+def wrap_text(text, width):
+    wrapped_text = textwrap.wrap(text, width)
+    return '\n'.join(wrapped_text)
+
+
+@st.cache_data()
+def get_base64(bin_file):
+    with open(bin_file, 'rb') as f:
+        data = f.read()
+    return base64.b64encode(data).decode()
+
+
+def set_background(png_file):
+    bin_str = get_base64(png_file)
+    page_bg_img = '''
+    <style>
+    .stApp {
+    background-image: url("data:image/png;base64,%s");
+    background-size: cover;
+    }
+    </style>
+    ''' % bin_str
+    st.markdown(page_bg_img, unsafe_allow_html=True)
+    return
+
+
+set_background('images/background-dark2.jpeg')
+
+
+######################################
+
+######################################
+# Sidebar Options
+def sidebar_bg(side_bg):
+    side_bg_ext = 'png'
+    st.markdown(
+        f"""
+      <style>
+      [data-testid="stSidebar"] > div:first-child {{
+          background: url(data:image/{side_bg_ext};base64,{base64.b64encode(open(side_bg, "rb").read()).decode()});
+      }}
+      </style>
+      """,
+        unsafe_allow_html=True,
+    )
+
+
+side_bg = './images/sidebar2-dark.png'
+sidebar_bg(side_bg)
+
+# sidebar logo
+st.markdown(
+    """
+    <style>
+        [data-testid=stSidebar] [data-testid=stImage]{
+            text-align: center;
+            display: block;
+            margin-left: auto;
+            margin-right: auto;
+            width: 100%;
+        }
+    </style>
+    """, unsafe_allow_html=True
+)
+
+with st.sidebar:
+    st.image("images/elastic_logo_transp_100.png")
+
+######################################
+# expander markdown
+st.markdown(
+    '''
+    <style>
+    .streamlit-expanderHeader {
+        background-color: gray;
+        color: black; # Adjust this for expander header color
+    }
+    .streamlit-expanderContent {
+        background-color: white;
+        color: black; # Expander content color
+    }
+    </style>
+    ''',
+    unsafe_allow_html=True
+)
+
+######################################
 
 # Configure OpenAI client
 openai.api_key = os.environ['OPENAI_API_KEY']
 openai.api_base = os.environ['OPENAI_API_BASE']
 openai.default_model = os.environ['OPENAI_API_ENGINE']
 openai.verify_ssl_certs = False
+client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)
+
 
+# Initialize Elasticsearch and APM clients
 # Configure APM and Elasticsearch clients
 @st.cache_resource
 def initElastic():
-    os.environ['ELASTIC_APM_SERVICE_NAME'] = "genai_workshop_lab_2-2"
+    os.environ['ELASTIC_APM_SERVICE_NAME'] = "genai_workshop_v2_lab_2-2"
     apmclient = elasticapm.Client()
     elasticapm.instrument()
 
-    if 'ELASTIC_CLOUD_ID' in os.environ:
-      es = Elasticsearch(
-        cloud_id=os.environ['ELASTIC_CLOUD_ID'],
-        api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),
-        request_timeout=30
-      )
+    if 'ELASTIC_CLOUD_ID_W' in os.environ:
+        es = Elasticsearch(
+            cloud_id=os.environ['ELASTIC_CLOUD_ID_W'],
+            api_key=(os.environ['ELASTIC_APIKEY_ID_W']),
+            request_timeout=30
+        )
     else:
-      es = Elasticsearch(
-        os.environ['ELASTIC_URL'],
-        api_key=(os.environ['ELASTIC_APIKEY_ID'], os.environ['ELASTIC_APIKEY_SECRET']),
-        request_timeout=30
-      )
+        es = Elasticsearch(
+            os.environ['ELASTIC_URL'],
+            basic_auth=(os.environ['ELASTIC_USER'], os.environ['ELASTIC_PASSWORD']),
+            request_timeout=30
+        )
 
     if os.environ['ELASTIC_PROXY'] != "True":
         openai.api_type = os.environ['OPENAI_API_TYPE']
         openai.api_version = os.environ['OPENAI_API_VERSION']
 
     return apmclient, es
+
+
 apmclient, es = initElastic()
 
 # Set our data index
-index = os.environ['ELASTIC_INDEX_DOCS']
+index = os.environ['ELASTIC_INDEX_DOCS_W']
 
-# Run an Elasticsearch query using BM25 relevance scoring
-@elasticapm.capture_span("bm25_search")
-def search_bm25(query_text, es):
-    query = {
-        "match": {
-            "body_content": query_text
-        }
-    }
+###############################################################
+# Similarity Cache functions
+# move to env if time
+cache_index = "wikipedia-cache"
 
-    fields= [
-        "title",
-        "url",
-        "position",
-        "body_content"
-      ]
 
-    collapse= {
-      "field": "title.enum"
-    }
+def clear_es_cache(es):
+    print('clearing cache')
+    match_all_query = {"query": {"match_all": {}}}
+    clear_response = es.delete_by_query(index=cache_index, body=match_all_query)
+    return clear_response
 
-    resp = es.search(index=index,
-                     query=query,
-                     fields=fields,
-                     collapse=collapse,
-                     size=1,
-                     source=False)
 
-    body = resp['hits']['hits'][0]['fields']['body_content'][0]
-    url = resp['hits']['hits'][0]['fields']['url'][0]
+@elasticapm.capture_span("cache_search")
+def cache_query(cache, prompt_text, similarity_threshold=0.5):
+    hit = cache.query(prompt_text=prompt_text, similarity_threshold=similarity_threshold)
 
-    return body, url
+    if hit:
+        st.sidebar.markdown('`Cache Match Found`')
+    else:
+        st.sidebar.markdown('`Cache Miss`')
+
+    return hit
+
+
+@elasticapm.capture_span("add_to_cache")
+def add_to_cache(cache, prompt, response):
+    st.sidebar.markdown('`Adding response to cache`')
+    print('adding to cache')
+    print(prompt)
+    print(response)
+    resp = cache.add(prompt=prompt, response=response)
+    st.markdown(resp)
+    return resp
+
+
+def init_elastic_cache():
+    # Init Elasticsearch Cache
+    # Only want to attempt to create the index on first run
+    cache = ElasticsearchLLMCache(es_client=es,
+                                  index_name=cache_index,
+                                  create_index=False  # setting only because of Streamlit behavior
+                                  )
+    st.sidebar.markdown('`creating Elasticsearch Cache`')
+
+    if "index_created" not in st.session_state:
+
+        st.sidebar.markdown('`running create_index`')
+        cache.create_index(768)
+
+        # Set the flag so it doesn't run every time
+        st.session_state.index_created = True
+    else:
+        st.sidebar.markdown('`index already created, skipping`')
 
-# Run an Elasticsearch query using ELSER relevance scoring
-@elasticapm.capture_span("elser_search")
-def search_elser(query_text, es):
-    query = {
-      "text_expansion": {
-        "ml.inference.chunk_expanded.tokens": {
-          "model_id": ".elser_model_1",
-          "model_text": query_text
+    return cache
+
+
+def calc_similarity(score, func_type='dot_product'):
+    if func_type == 'dot_product':
+        return (score + 1) / 2
+    elif func_type == 'cosine':
+        return (1 + score) / 2
+    elif func_type == 'l2_norm':
+        return 1 / (1 + score ^ 2)
+    else:
+        return score
+
+
+###############################################################
+
+
+def get_bm25_query(query_text, augment_method):
+    if augment_method == "Full Text":
+        return {
+            "match": {
+                "text": query_text
+            }
         }
-      }
-    }
+    elif augment_method == "Matching Chunk":
+        return {
+            "nested": {
+                "path": "passages",
+                "query": {
+                    "bool": {
+                        "must": [
+                            {
+                                "match": {
+                                    "passages.text": query_text
+                                }
+                            }
+                        ]
+                    }
+                },
+                "inner_hits": {
+                    "_source": False,
+                    "fields": [
+                        "passages.text"
+                    ]
+                }
 
+            }
+        }
+
+
+# Run an Elasticsearch query using BM25 relevance scoring
+@elasticapm.capture_span("bm25_search")
+def search_bm25(query_text,
+                es,
+                size=1,
+                augment_method="Full Text",
+                use_hybrid=False  # always false - use semantic opt for hybrid
+                ):
     fields = [
-      "title",
-      "url",
-      "position",
-      "body_content"
+        "text",
+        "title",
     ]
 
-    collapse = {
-      "field": "title.enum"
-    }
-
     resp = es.search(index=index,
-                     query=query,
+                     query=get_bm25_query(query_text, augment_method),
                      fields=fields,
-                     collapse=collapse,
-                     size=1,
+                     size=size,
                      source=False)
-
-    body = resp['hits']['hits'][0]['fields']['body_content'][0]
-    url = resp['hits']['hits'][0]['fields']['url'][0]
+    # print(resp)
+    body = resp
+    url = 'nothing'
 
     return body, url
 
-# Run an Elasticsearch query using hybrid RRF scoring of KNN and BM25
+
 @elasticapm.capture_span("knn_search")
-def search_knn(query_text, es):
-    query = {
-        "bool": {
-            "must": [{
-                "match": {
-                    "body_content": {
-                        "query": query_text
-                    }
-                }
-            }],
-            "filter": [{
-              "term": {
-                "url_path_dir3": "elasticsearch"
-              }
-            }]
-        }
-    }
+def search_knn(query_text,
+               es,
+               size=1,
+               augment_method="Full Text",
+               use_hybrid=False
+               ):
+    fields = [
+        "title",
+        "text"
+    ]
 
-    knn = [
-    {
-      "field": "chunk-vector",
-      "k": 10,
-      "num_candidates": 10,
-      "filter": {
-        "bool": {
-          "filter": [
-            {
-              "range": {
-                "chunklength": {
-                  "gte": 0
-                }
-              }
-            },
-            {
-              "term": {
-                "url_path_dir3": "elasticsearch"
-              }
+    knn = {
+        "inner_hits": {
+            "_source": False,
+            "fields": [
+                "passages.text"
+            ]
+        },
+        "field": "passages.embeddings",
+        "k": size,
+        "num_candidates": 100,
+        "query_vector_builder": {
+            "text_embedding": {
+                "model_id": "sentence-transformers__all-distilroberta-v1",
+                "model_text": query_text
             }
-          ]
-        }
-      },
-      "query_vector_builder": {
-        "text_embedding": {
-          "model_id": "sentence-transformers__msmarco-minilm-l-12-v3",
-          "model_text": query_text
         }
-      }
-    }]
-
-    rank = {
-       "rrf": {
-       }
     }
 
-    fields= [
-        "title",
-        "url",
-        "position",
-        "url_path_dir3",
-        "body_content"
-      ]
+    rank = {"rrf": {}} if use_hybrid else None
+
+    # need to get the bm25 query if we are using hybrid
+    if use_hybrid:
+        print('using hybrid with augment method %s' % augment_method)
+        query = get_bm25_query(query_text, augment_method)
+        print(query)
+        if augment_method == "Matching Chunk":
+            del query['nested']['inner_hits']
+    else:
+        print('not using hybrid')
+        query = None
+
+    print(query)
+    print(knn)
 
     resp = es.search(index=index,
-                     query=query,
                      knn=knn,
-                     rank=rank,
+                     query=query,
                      fields=fields,
-                     size=10,
+                     size=size,
+                     rank=rank,
                      source=False)
 
-    body = resp['hits']['hits'][0]['fields']['body_content'][0]
-    url = resp['hits']['hits'][0]['fields']['url'][0]
+    return resp, None
 
-    return body, url
 
 def truncate_text(text, max_tokens):
     tokens = text.split()
@@ -194,120 +339,317 @@ def truncate_text(text, max_tokens):
 
     return ' '.join(tokens[:max_tokens])
 
-# Generate a response from ChatGPT based on the given prompt
-def chat_gpt(prompt, max_tokens=1024, max_context_tokens=4000, safety_margin=5, sys_content=None):
 
-    # Truncate the prompt content to fit within the model's context length
-    truncated_prompt = truncate_text(prompt, max_context_tokens - max_tokens - safety_margin)
-
-    # Make the right OpenAI call depending on the API we're using
-    if(os.environ["ELASTIC_PROXY"] == "True"):
-      client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)
-      response = client.chat.completions.create(model=openai.default_model,
-                                              temperature=0,
-                                              messages=[{"role": "system", "content": sys_content},
-                                                        {"role": "user", "content": truncated_prompt}]
-                                              )
-    else:
-      client = OpenAI(api_key=openai.api_key, base_url=openai.api_base)
-      response = client.chat.completions.create(engine=openai.default_model,
-                                              temperature=0,
-                                              messages=[{"role": "system", "content": sys_content},
-                                                        {"role": "user", "content": truncated_prompt}]
-                                              )
-
-
-    # APM: add metadata labels of data we want to capture
-    elasticapm.label(model = openai.default_model)
-    elasticapm.label(prompt = prompt)
-    elasticapm.label(total_tokens = response.usage.total_tokens)
-    elasticapm.label(prompt_tokens = response.usage.prompt_tokens)
-    elasticapm.label(response_tokens = response.usage.completion_tokens)
-    if 'USER_HASH' in os.environ: elasticapm.label(user = os.environ['USER_HASH'])
-
-    return response.choices[0].message.content
-
-def toLLM(resp, url, usr_prompt, sys_prompt, neg_resp, show_prompt):
-    prompt_template = Template(usr_prompt)
-    prompt_formatted = prompt_template.substitute(query=query, resp=resp, negResponse=negResponse)
-    answer = chat_gpt(prompt_formatted, sys_content=sys_prompt)
-
-    # Display response from LLM
-    st.header('Response from LLM')
-    st.markdown(answer.strip())
-
-    # We don't need to return a reference URL if it wasn't useful
-    if not negResponse in answer:
-        st.write(url)
-
-    # Display full prompt if checkbox was selected
+def build_text_obj(resp, aug_method):
+
+    tobj = {}
+
+    for hit in resp['hits']['hits']:
+        # tobj[hit['fields']['title'][0]] = []
+        title = hit['fields']['title'][0]
+        tobj.setdefault(title, [])
+
+        if aug_method == "Matching Chunk":
+            print('hit')
+            print(hit)
+            # tobj['passages'] = []
+            for ihit in hit['inner_hits']['passages']['hits']['hits']:
+                tobj[title].append(
+                    {'passage': ihit['fields']['passages'][0]['text'][0],
+                     '_score': ihit['_score']}
+                )
+        elif aug_method == "Full Text":
+            tobj[title].append(
+                hit['fields']
+            )
+
+    return tobj
+
+
+def generate_response(query,
+                      es,
+                      search_method,
+                      custom_prompt,
+                      negative_response,
+                      show_prompt, size=1,
+                      augment_method="Full Text",
+                      use_hybrid=False,
+                      show_es_response=True,
+                      show_es_augment=True,
+                      ):
+
+    # Perform the search based on the specified method
+    search_functions = {
+        'bm25': {'method': search_bm25, 'display': 'Lexical Search'},
+        'knn': {'method': search_knn, 'display': 'Semantic Search'}
+    }
+    search_func = search_functions.get(search_method)['method']
+    if not search_func:
+        raise ValueError(f"Invalid search method: {search_method}")
+
+    # Perform the search and format the docs
+    response, url = search_func(query, es, size, augment_method, use_hybrid)
+    es_time = time.time()
+    augment_text = build_text_obj(response, augment_method)
+
+    res_col1, res_col2 = st.columns(2)
+    # Display the search results from ES
+    with res_col2:
+        st.header(':rainbow[Elasticsearch Response]')
+        st.subheader(':orange[Search Settings]')
+        st.write(':gray[Search Method:] :blue[%s]' % search_functions.get(search_method)['display'])
+        st.write(':gray[Size Setting:] :blue[%s]' % size)
+        st.write(':gray[Augment Setting:] :blue[%s]' % augment_method)
+        st.write(':gray[Using Hybrid:] :blue[%s]' % (
+            'Not Applicable with Lexical' if search_method == 'bm25' else use_hybrid))
+
+        st.subheader(':green[Augment Chunk(s) from Elasticsearch]')
+        if show_es_augment:
+            st.json(dict(augment_text))
+        else:
+            st.write(':blue[Show Augment Disabled]')
+
+        st.subheader(':violet[Elasticsearch Response]')
+        if show_es_response:
+            st.json(dict(response))
+        else:
+            st.write(':blue[Response Received]')
+
+    formatted_prompt = custom_prompt.replace("$query", query).replace("$response", str(augment_text)).replace(
+        "$negResponse", negative_response)
+
+    with res_col1:
+        st.header(':orange[GenAI Response]')
+
+        chat_response = chat_gpt(formatted_prompt, system_prompt="You are a helpful assistant.")
+
+        # Display assistant response in chat message container
+        with st.chat_message("assistant"):
+            message_placeholder = st.empty()
+            full_response = ""
+            for chunk in chat_response.split():
+                full_response += chunk + " "
+                time.sleep(0.02)
+                # Add a blinking cursor to simulate typing
+                message_placeholder.markdown(full_response + "▌")
+            message_placeholder.markdown(full_response)
+
+    # Display results
     if show_prompt:
-        st.divider()
-        st.subheader('Full prompt sent to LLM')
-        prompt_formatted
+        st.text("Full prompt sent to ChatGPT:")
+        st.text(wrap_text(formatted_prompt, 70))
 
-# Prompt Defaults
-prompt_default = """Answer this question: $query
-Using only the information from this Elastic Doc: $resp
-Format the answer in complete markdown code format
-If the answer is not contained in the supplied doc reply '$negResponse' and nothing else"""
+    if negative_response not in chat_response:
+        pass
+    else:
+        chat_response = None
 
-system_default = 'You are a helpful assistant.'
-neg_default = "I'm unable to answer the question based on the information I have from Elastic Docs."
+    return es_time, chat_response
 
 
-''' Main chat form
-'''
-st.title("ElasticDocs GPT")
+def chat_gpt(user_prompt, system_prompt):
+    """
+    Generates a response from ChatGPT based on the given user and system prompts.
+    """
+    max_tokens = 1024
+    max_context_tokens = 4000
+    safety_margin = 5
 
-with st.form("chat_form"):
+    # Truncate the prompt content to fit within the model's context length
+    truncated_prompt = truncate_text(user_prompt, max_context_tokens - max_tokens - safety_margin)
+
+    # Prepare the messages for the ChatGPT API
+    messages = [{"role": "system", "content": system_prompt},
+                {"role": "user", "content": truncated_prompt}]
 
-    query = st.text_input("Ask the Elastic Documentation a question: ", placeholder='I want to secure my elastic cluster')
+    # Add APM metadata and return the response content
+    elasticapm.set_custom_context({'model': openai.default_model, 'prompt': user_prompt})
+    # return response["choices"][0]["message"]["content"]
 
-    with st.expander("Show Prompt Override Inputs"):
-        # Inputs for system and User prompt override
-        sys_prompt = st.text_area("create an alernative system prompt", placeholder=system_default, value=system_default)
-        usr_prompt = st.text_area("create an alternative user prompt required -> \$query, \$resp, \$negResponse",
-                                   placeholder=prompt_default, value=prompt_default )
+    full_response = ""
+    for response in client.chat.completions.create(
+        model=openai.default_model,
+        temperature=0,
+        messages=messages,
+        stream=True
+    ):
+        full_response += (response.choices[0].delta.content or "")
 
-        # Default Response when criteria are not met
-        negResponse = st.text_area("Create an alternative negative response", placeholder = neg_default, value=neg_default)
+    return full_response
 
-    show_full_prompt = st.checkbox('Show Full Prompt Sent to LLM')
 
-    # Query Submit Buttons
-    col1, col2, col3 = st.columns(3)
+# Main chat form
+st.title("Wikipedia RAG Demo Platform")
+
+# Define the default prompt and negative response
+default_prompt_intro = "Answer this question:"
+default_response_instructions = ("using only the information from the wikipedia documents included and nothing "
+                                 "else.\nwikipedia_docs: $response\n")
+default_negative_response = ("If the answer is not provided in the included documentation. You are to ONLY reply with "
+                             "'I'm unable to answer the question based on the information I have from wikipedia' and "
+                             "nothing else.")
+
+with st.form("chat_form"):
+    query = st.text_input("Ask the Elastic Documentation a question:",
+                          placeholder='Who is Batman?')
+
+    opt_col1, opt_col2 = st.columns(2)
+    with opt_col1:
+        with st.expander("Customize Prompt Template"):
+            prompt_intro = st.text_area("Introduction/context of the prompt:", value=default_prompt_intro)
+            prompt_query_placeholder = st.text_area("Placeholder for the user's query:", value="$query")
+            prompt_response_placeholder = st.text_area("Placeholder for the Elasticsearch response:",
+                                                       value=default_response_instructions)
+            prompt_negative_response = st.text_area("Negative response placeholder:", value=default_negative_response)
+            prompt_closing = st.text_area("Closing remarks of the prompt:",
+                                          value="Format the answer in complete markdown code format.")
+
+            combined_prompt = f"{prompt_intro}\n{prompt_query_placeholder}\n{prompt_response_placeholder}\n{prompt_negative_response}\n{prompt_closing}"
+            st.text_area("Preview of your custom prompt:", value=combined_prompt, disabled=True)
+
+    with opt_col2:
+        with st.expander("Retrieval Search and Display Options"):
+            st.subheader("Retrieval Options")
+            ret_1, ret_2 = st.columns(2)
+            with ret_1:
+                search_method = st.radio("Search Method", ("Semantic Search", "Lexical Search"))
+                augment_method = st.radio("Augment Method", ("Full Text", "Matching Chunk"))
+            with ret_2:
+                # TODO this should update the title based on the augment_method
+                doc_count_title = "Number of docs or chunks to Augment with" if augment_method == "Full Text" else "Number of Matching Chunks to Retrieve"
+                doc_count = st.slider(doc_count_title, min_value=1, max_value=5, value=1)
+
+                use_hybrid = st.checkbox('Use Hybrid Search')
+
+            st.divider()
+
+            st.subheader("Display Options")
+            show_es_augment = st.checkbox('Show Elasticsearch Augment Text', value=True)
+            show_es_response = st.checkbox('Show Elasticsearch Response', value=True)
+            show_full_prompt = st.checkbox('Show Full Prompt Sent to LLM')
+
+            st.divider()
+
+            st.subheader("Caching Options")
+            cache_1, cache_2 = st.columns(2)
+            with cache_1:
+                use_cache = st.checkbox('Use Similarity Cache')
+                # Slider for adjusting similarity threshold
+                similarity_threshold_selection = st.slider(
+                    "Select Similarity Threshold (dot_product - Higher Similarity means closer)",
+                    min_value=0.0, max_value=2.0,
+                    value=0.5, step=0.01)
+
+            with cache_2:
+                clear_cache_butt = st.form_submit_button(':red[Clear Similarity Cache]')
+
+    col1, col2 = st.columns(2)
     with col1:
-        bm25_button = st.form_submit_button("Use BM25")
-    with col2:
-        knn_button = st.form_submit_button("Use kNN")
-    with col3:
-        elser_button = st.form_submit_button("Use ELSER")
+        answer_button = st.form_submit_button("Find my answer!")
 
-if elser_button:
-    apmclient.begin_transaction("query")
-    elasticapm.label(search_method = "elser")
-    elasticapm.label(query = query)
+# Clear Cache Button
+if clear_cache_butt:
+    st.session_state.clear_cache_clicked = True
 
-    resp, url = search_elser(query, es) # run ELSER query
-    toLLM(resp, url, usr_prompt, sys_prompt, negResponse, show_full_prompt)
+# Confirmation step
+if st.session_state.get("clear_cache_clicked", False):
+    apmclient.begin_transaction("clear_cache")
+    elasticapm.label(action="clear_cache")
 
-    apmclient.end_transaction("query", "success")
-if knn_button:
-    apmclient.begin_transaction("query")
-    elasticapm.label(search_method = "knn")
-    elasticapm.label(query = query)
+    # Start timing
+    start_time = time.time()
 
-    resp, url = search_knn(query, es) # run kNN hybrid query
-    toLLM(resp, url, usr_prompt, sys_prompt, negResponse, show_full_prompt)
+    if st.button(":red[Confirm Clear Cache]"):
+        print('clear cache clicked')
+        # TODO if index doesn't exist, catch exception then create it
+        response = clear_es_cache(es)
+        st.success("Cache cleared successfully!", icon="🤯")
+        st.session_state.clear_cache_clicked = False  # Reset the state
 
-    apmclient.end_transaction("query", "success")
-if bm25_button:
-    apmclient.begin_transaction("query")
-    elasticapm.label(search_method = "bm25")
-    elasticapm.label(query = query)
+    apmclient.end_transaction("clear_cache", "success")
 
-    resp, url = search_bm25(query, es) # run kNN hybrid query
-    toLLM(resp, url, usr_prompt, sys_prompt, negResponse, show_full_prompt)
+if answer_button:
+    search_method = "knn" if search_method == "Semantic Search" else "bm25"
 
-    apmclient.end_transaction("query", "success")
\ No newline at end of file
+    apmclient.begin_transaction("query")
+    elasticapm.label(search_method=search_method)
+    elasticapm.label(query=query)
+
+    # Start timing
+    start_time = time.time()
+
+    if use_cache:
+        cache = init_elastic_cache()
+
+        # check the llm cache first
+        st.sidebar.markdown('`Checking ES Cache`')
+        cache_check = cache_query(cache,
+                                  prompt_text=query,
+                                  similarity_threshold=similarity_threshold_selection
+                                  )
+        # st.markdown(cache_check)
+    else:
+        cache_check = None
+        st.sidebar.markdown('`Skipping ES Cache`')
+
+    try:
+
+        if cache_check:
+            es_time = time.time()
+            st.sidebar.markdown('`cache match, using cached results`')
+            st.subheader('Response from Cache')
+            s_score = calc_similarity(cache_check['_score'], func_type='dot_product')
+            st.code(f"Similarity Value: {s_score:.5f}")
+
+            # Display response from LLM
+            st.header('LLM Response')
+            # st.markdown(cache_check['response'][0])
+            with st.chat_message("assistant"):
+                message_placeholder = st.empty()
+                full_response = ""
+                for chunk in cache_check['response'][0].split():
+                    full_response += chunk + " "
+                    time.sleep(0.02)
+                    # Add a blinking cursor to simulate typing
+                    message_placeholder.markdown(full_response + "▌")
+                message_placeholder.markdown(full_response)
+
+            llmAnswer = None  # no need to recache the answer
+
+        else:
+            # Use combined_prompt and show_full_prompt as arguments
+            es_time, llmAnswer = generate_response(query,
+                                                   es,
+                                                   search_method,
+                                                   combined_prompt,
+                                                   prompt_negative_response,
+                                                   show_full_prompt,
+                                                   doc_count,
+                                                   augment_method,
+                                                   use_hybrid,
+                                                   show_es_response,
+                                                   show_es_augment,
+                                                   )
+        apmclient.end_transaction("query", "success")
+
+        if use_cache and llmAnswer:
+            if "I'm unable to answer the question" in llmAnswer:
+                st.sidebar.markdown('`unable to answer, not adding to cache`')
+            else:
+                st.sidebar.markdown('`adding prompt and response to cache`')
+                add_to_cache(cache, query, llmAnswer)
+
+        # End timing and print the elapsed time
+        elapsed_time = time.time() - start_time
+        es_elapsed_time = es_time - start_time
+
+        ct1, ct2 = st.columns(2)
+        with ct1:
+            st.subheader("GenAI Time taken: :red[%.2f seconds]" % elapsed_time)
+
+        with ct2:
+            st.subheader("ES Query Time taken: :green[%.2f seconds]" % es_elapsed_time)
+
+    except Exception as e:
+        st.error(f"An error occurred: {str(e)}")
+        apmclient.end_transaction("query", "failure")
diff --git a/streamlit/images/background-dark2.jpeg b/streamlit/images/background-dark2.jpeg
new file mode 100644
index 0000000..ba66ebd
Binary files /dev/null and b/streamlit/images/background-dark2.jpeg differ
diff --git a/streamlit/images/elastic_logo_transp_100.png b/streamlit/images/elastic_logo_transp_100.png
new file mode 100644
index 0000000..91c902b
Binary files /dev/null and b/streamlit/images/elastic_logo_transp_100.png differ
diff --git a/streamlit/images/sidebar2-dark.png b/streamlit/images/sidebar2-dark.png
new file mode 100644
index 0000000..f48b0cd
Binary files /dev/null and b/streamlit/images/sidebar2-dark.png differ