diff --git a/colabs/gemini/How_to_use_Gemini_Pro_API_with_WB_Weave.ipynb b/colabs/gemini/How_to_use_Gemini_Pro_API_with_WB_Weave.ipynb
new file mode 100644
index 00000000..18acbe5e
--- /dev/null
+++ b/colabs/gemini/How_to_use_Gemini_Pro_API_with_WB_Weave.ipynb
@@ -0,0 +1,526 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "c5eb1db5-a89e-4bba-8a4c-078b647be1b7",
+ "metadata": {},
+ "source": [
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "_7u7Ft1-GXrK",
+ "metadata": {
+ "id": "_7u7Ft1-GXrK"
+ },
+ "source": [
+ "# How to use Gemini Pro API with W&B Weave\n",
+ "\n",
+ "Read [our article](https://wandb.ai/prompt-eng/gemini-weave/reports/How-to-use-Gemini-Pro-API-with-W-B-Weave--Vmlldzo3NzEwNTA1) and follow along in this colab."
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Nk3L-jqbGsrN",
+ "metadata": {
+ "id": "Nk3L-jqbGsrN"
+ },
+ "source": [
+ "## Installation"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4fdc0c0e-bc69-41ed-9b09-ec469a3a781b",
+ "metadata": {
+ "id": "4fdc0c0e-bc69-41ed-9b09-ec469a3a781b"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!pip install google-generativeai weave -qqU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fiI5pF6WGyd_",
+ "metadata": {
+ "id": "fiI5pF6WGyd_"
+ },
+ "outputs": [],
+ "source": [
+ "import google.generativeai as genai\n",
+ "import weave"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "Avj5TxGjFhE9",
+ "metadata": {
+ "id": "Avj5TxGjFhE9"
+ },
+ "source": [
+ "## Set up your Google API key and log into W&B Weave\n",
+ "\n",
+ "To run the following cell, your API key must be stored it in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see the [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) quickstart for an example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d0Lv-deDFgLl",
+ "metadata": {
+ "id": "d0Lv-deDFgLl"
+ },
+ "outputs": [],
+ "source": [
+ "from google.colab import userdata\n",
+ "GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')\n",
+ "genai.configure(api_key=GOOGLE_API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "894d8781-bea4-4f74-b1cd-b893f91bfee6",
+ "metadata": {
+ "id": "894d8781-bea4-4f74-b1cd-b893f91bfee6"
+ },
+ "outputs": [],
+ "source": [
+ "weave.init('prompt-eng/gemini-weave')"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "wFiQm3JAJpvw",
+ "metadata": {
+ "id": "wFiQm3JAJpvw"
+ },
+ "source": [
+ "## Generate a summary and track it in Weave"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "82d096fc-309a-4c57-a3a8-926d88a520d3",
+ "metadata": {
+ "id": "82d096fc-309a-4c57-a3a8-926d88a520d3"
+ },
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!wget https://raw.githubusercontent.com/wandb/llm-workshop-fc2024/main/part_2_structured_outputs/longpaper.txt\n",
+ "with open('longpaper.txt', 'r') as file:\n",
+ " long_paper_text = file.read()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3343fcb9-b96d-478d-a4a8-fbb1e7b27a34",
+ "metadata": {
+ "id": "3343fcb9-b96d-478d-a4a8-fbb1e7b27a34"
+ },
+ "outputs": [],
+ "source": [
+ "model_info = genai.get_model('models/gemini-1.5-pro-latest')\n",
+ "print(model_info.input_token_limit)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "03216cd4-9fca-4dcb-aa73-b250cda7ef26",
+ "metadata": {
+ "id": "03216cd4-9fca-4dcb-aa73-b250cda7ef26"
+ },
+ "outputs": [],
+ "source": [
+ "model = genai.GenerativeModel('models/gemini-1.5-pro-latest')\n",
+ "model.count_tokens(long_paper_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba32823a-f4f1-4233-8389-590e8757e85e",
+ "metadata": {
+ "id": "ba32823a-f4f1-4233-8389-590e8757e85e"
+ },
+ "outputs": [],
+ "source": [
+ "@weave.op()\n",
+ "def generate_summary(text):\n",
+ " prompt = \"Generate a concise summary of below text:\\n\"\n",
+ " response = model.generate_content(prompt + long_paper_text)\n",
+ " return {\n",
+ " 'summary': response.text\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9128199a-e13f-4d7e-b26c-55267ab8e161",
+ "metadata": {
+ "id": "9128199a-e13f-4d7e-b26c-55267ab8e161"
+ },
+ "outputs": [],
+ "source": [
+ "summary = generate_summary(long_paper_text)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "ur0vXGNtJ5IF",
+ "metadata": {
+ "id": "ur0vXGNtJ5IF"
+ },
+ "source": [
+ "## Gemini API JSON Mode"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4de6d4b7-f567-4070-b9d5-1867b72fc045",
+ "metadata": {
+ "id": "4de6d4b7-f567-4070-b9d5-1867b72fc045"
+ },
+ "outputs": [],
+ "source": [
+ "model = genai.GenerativeModel(\"gemini-1.5-pro-latest\",\n",
+ " generation_config={\"response_mime_type\": \"application/json\"})"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c403ccf8-58fd-4e8a-b951-91e7aa39022c",
+ "metadata": {
+ "id": "c403ccf8-58fd-4e8a-b951-91e7aa39022c"
+ },
+ "outputs": [],
+ "source": [
+ "from pydantic import BaseModel, Field\n",
+ "\n",
+ "class Summary(BaseModel):\n",
+ " title: str\n",
+ " summary: str = Field(description=\"plain short text summary without markdown\")\n",
+ "\n",
+ "schema = Summary.model_json_schema()\n",
+ "schema"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9a49b8de-e40d-4dc5-8e79-97af5efb1797",
+ "metadata": {
+ "id": "9a49b8de-e40d-4dc5-8e79-97af5efb1797"
+ },
+ "outputs": [],
+ "source": [
+ "import json"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "10b19a45-c4a9-47be-abda-97607f0e4431",
+ "metadata": {
+ "id": "10b19a45-c4a9-47be-abda-97607f0e4431"
+ },
+ "outputs": [],
+ "source": [
+ "@weave.op()\n",
+ "def create_prompt(text, schema):\n",
+ " prompt = f\"\"\"Generate a concise summary of below text using below JSON schema.\n",
+ "Please output plain text without markdown and limit it to 200 words.\n",
+ "Text:\n",
+ "{text}\n",
+ "JSON schema:\n",
+ "{schema}\n",
+ "\"\"\"\n",
+ " return prompt\n",
+ "\n",
+ "\n",
+ "@weave.op()\n",
+ "def generate_summary(text, schema):\n",
+ " prompt = create_prompt(text, schema)\n",
+ " response = model.generate_content(prompt)\n",
+ " try:\n",
+ " output = json.loads(response.text)\n",
+ " except:\n",
+ " output = response.text\n",
+ " return {\n",
+ " 'summary': output\n",
+ " }"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "b8b03f96-f04b-44b6-b03c-0065243408d3",
+ "metadata": {
+ "id": "b8b03f96-f04b-44b6-b03c-0065243408d3"
+ },
+ "outputs": [],
+ "source": [
+ "new_summary = generate_summary(long_paper_text, schema)"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "EFuMHhC5J8NO",
+ "metadata": {
+ "id": "EFuMHhC5J8NO"
+ },
+ "source": [
+ "## Evaluation with Weave"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "65abebe7-a0f4-4a7a-bdae-888d67e9403f",
+ "metadata": {
+ "id": "65abebe7-a0f4-4a7a-bdae-888d67e9403f"
+ },
+ "outputs": [],
+ "source": [
+ "from pydantic import model_validator\n",
+ "import os\n",
+ "import time\n",
+ "\n",
+ "os.environ['WEAVE_PARALLELISM'] = '1' # remove parallelism due to our Gemini quota, remove it if not needed\n",
+ "\n",
+ "\n",
+ "class SummaryModel(weave.Model):\n",
+ " model_name: str\n",
+ " prompt_template: str\n",
+ " json_schema: dict\n",
+ " model: genai.GenerativeModel\n",
+ "\n",
+ " @model_validator(mode=\"before\")\n",
+ " def create_model(cls, v):\n",
+ " model_name = v[\"model_name\"]\n",
+ " model = genai.GenerativeModel(model_name,\n",
+ " generation_config={\"response_mime_type\": \"application/json\"})\n",
+ " v[\"model\"] = model\n",
+ " return v\n",
+ "\n",
+ " @weave.op()\n",
+ " async def predict(self, text: str) -> dict:\n",
+ " time.sleep(15) # remove if your Gemini quota allows for it :)\n",
+ " prompt = self.prompt_template.format(text=text, schema=self.schema)\n",
+ " response = self.model.generate_content(prompt)\n",
+ " try:\n",
+ " output = json.loads(response.text)\n",
+ " return output[0]\n",
+ " except:\n",
+ " return {'summary': response.text}\n",
+ "\n"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "4b8343cb-4a54-4fea-b2b0-b2c21b39e061",
+ "metadata": {
+ "id": "4b8343cb-4a54-4fea-b2b0-b2c21b39e061"
+ },
+ "outputs": [],
+ "source": [
+ "prompt_template = \"\"\"Generate a concise summary of below text using below JSON schema.\n",
+ "Please output plain text without markdown and limit it to 200 words.\n",
+ "Text:\n",
+ "{text}\n",
+ "JSON schema:\n",
+ "{schema}\n",
+ "\"\"\""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "d69ed0c0-e032-4ced-8fd8-919ee332a95f",
+ "metadata": {
+ "id": "d69ed0c0-e032-4ced-8fd8-919ee332a95f"
+ },
+ "outputs": [],
+ "source": [
+ "model = SummaryModel(model_name='gemini-1.5-pro-latest', prompt_template=prompt_template, \\\n",
+ " json_schema=schema)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "3d78ca87-bd61-45e6-b62c-0b82754d2b79",
+ "metadata": {
+ "id": "3d78ca87-bd61-45e6-b62c-0b82754d2b79"
+ },
+ "outputs": [],
+ "source": [
+ "await model.predict(long_paper_text)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "dca5e402-0805-4888-8f08-0b28748fea11",
+ "metadata": {
+ "id": "dca5e402-0805-4888-8f08-0b28748fea11"
+ },
+ "outputs": [],
+ "source": [
+ "dataset_uri = \"weave:///prompt-eng/gemini-weave/object/long_papers:9N9vkE4XY1SYoXLbvbCtP0YKqyqXErilG4XW8jYmQgE\"\n",
+ "dataset = weave.ref(dataset_uri).get()"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "de2f8804-dfd7-49b1-adbe-0a3dbb0c5ff1",
+ "metadata": {
+ "id": "de2f8804-dfd7-49b1-adbe-0a3dbb0c5ff1"
+ },
+ "outputs": [],
+ "source": [
+ "# Scoring function checking format adherence\n",
+ "@weave.op()\n",
+ "def check_formatting(model_output: dict) -> dict:\n",
+ " # Check if length is smaller than threshold\n",
+ " result = False\n",
+ " if type(model_output) == list:\n",
+ " model_output = model_output[0]\n",
+ " if type(model_output) == dict:\n",
+ " if 'summary' in model_output.keys():\n",
+ " if type(model_output['summary']) == str:\n",
+ " result = True\n",
+ " return {'formatting': result}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "yfF8_ZsRMhYe",
+ "metadata": {
+ "id": "yfF8_ZsRMhYe"
+ },
+ "outputs": [],
+ "source": [
+ "# Scoring function checking length of summary\n",
+ "@weave.op()\n",
+ "def check_conciseness(model_output: dict) -> dict:\n",
+ " # Check if length is smaller than threshold\n",
+ " result = False\n",
+ " if type(model_output) == list:\n",
+ " model_output = model_output[0]\n",
+ " if type(model_output) == dict:\n",
+ " if 'summary' in model_output.keys():\n",
+ " summary = model_output['summary']\n",
+ " if type(summary) == str:\n",
+ " result = len(summary.split()) < 300\n",
+ " return {'conciseness': result}"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "1d1c6bdb-83f9-4a63-99c4-0b2f0d3e9139",
+ "metadata": {
+ "id": "1d1c6bdb-83f9-4a63-99c4-0b2f0d3e9139"
+ },
+ "outputs": [],
+ "source": [
+ "evaluation = weave.Evaluation(\n",
+ " dataset=dataset, scorers=[check_formatting, check_conciseness],\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "fa5b2822-55cf-4983-a52f-10314ca79c0a",
+ "metadata": {
+ "id": "fa5b2822-55cf-4983-a52f-10314ca79c0a"
+ },
+ "outputs": [],
+ "source": [
+ "await evaluation.evaluate(model)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "ba244582",
+ "metadata": {
+ "id": "ba244582"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c39dc979",
+ "metadata": {
+ "id": "c39dc979"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "8d2f8124",
+ "metadata": {
+ "id": "8d2f8124"
+ },
+ "outputs": [],
+ "source": []
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "f3012c72",
+ "metadata": {
+ "id": "f3012c72"
+ },
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "colab": {
+ "provenance": []
+ },
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}
diff --git a/colabs/gemini/README.md b/colabs/gemini/README.md
new file mode 100644
index 00000000..5504b8fe
--- /dev/null
+++ b/colabs/gemini/README.md
@@ -0,0 +1,6 @@
+# W&B Weave and Google Gemini Pro
+
+1) Start by checking out our [intro article](https://wandb.ai/prompt-eng/gemini-weave/reports/How-to-use-the-Gemini-Pro-API-with-W-B-Weave--Vmlldzo3NzEwNTA1)
+2) Follow along using the [colab](How_to_use_Gemini_Pro_API_with_WB_Weave.ipynb)
+3) See the [video demo](https://www.youtube.com/watch?v=yzgDXMb8L8E)
+4) Experiment with our [RAG example](google-demo-rag.ipynb)
\ No newline at end of file
diff --git a/colabs/gemini/google-demo-rag.ipynb b/colabs/gemini/google-demo-rag.ipynb
new file mode 100644
index 00000000..13b20c49
--- /dev/null
+++ b/colabs/gemini/google-demo-rag.ipynb
@@ -0,0 +1,355 @@
+{
+ "cells": [
+ {
+ "cell_type": "markdown",
+ "id": "18e2cdd1-0663-45fd-b5cd-63eee509fd79",
+ "metadata": {},
+ "source": [
+ "\n",
+ ""
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e7be1d7d-6953-4a88-8259-def2b0799934",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "%%capture\n",
+ "!pip install google-generativeai weave -qqU"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "937452d8-c1a6-4bcd-aeee-0bf680a64902",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import google.generativeai as genai\n",
+ "import weave"
+ ]
+ },
+ {
+ "cell_type": "markdown",
+ "id": "c05ed102-8d6c-404f-83a5-00c5dc645cc4",
+ "metadata": {},
+ "source": [
+ "## Set up your Google API key and log into W&B Weave\n",
+ "\n",
+ "To run the following cell, your API key must be stored it in a Colab Secret named `GOOGLE_API_KEY`. If you don't already have an API key, or you're not sure how to create a Colab Secret, see the [Authentication](https://github.com/google-gemini/cookbook/blob/main/quickstarts/Authentication.ipynb) quickstart for an example."
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "c2d877e9-1196-4664-ab74-628b7dfbf55d",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from google.colab import userdata\n",
+ "GOOGLE_API_KEY=userdata.get('GOOGLE_API_KEY')\n",
+ "genai.configure(api_key=GOOGLE_API_KEY)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "be15ff1b-7ed3-44de-b8a8-60ab84d5d995",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "import asyncio\n",
+ "from weave import Model, Evaluation, Dataset\n",
+ "import numpy as np"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9caf7340-f29a-45a6-81e4-9960ace2ca87",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# We call init to begin capturing data in the project, intro-example.\n",
+ "weave.init(\"prompt-eng/gemini-rag\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "5d8a4b51-9435-4f0b-b680-fc07a4d31e76",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "from google.api_core import retry\n",
+ "emb_model = 'models/embedding-001'\n",
+ "\n",
+ "def make_embed_text_fn(model):\n",
+ " @retry.Retry(timeout=300.0)\n",
+ " def embed_fn(text: str) -> list[float]:\n",
+ " embedding = genai.embed_content(model=model,\n",
+ " content=text,\n",
+ " task_type=\"retrieval_document\")['embedding']\n",
+ " return np.array(embedding)\n",
+ " return embed_fn"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "7a8a724c-e532-4e38-aac3-7560a7fc9298",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@weave.op()\n",
+ "def docs_to_embeddings(docs: list) -> list:\n",
+ " # Convert documents to embeddings\n",
+ " document_embeddings = []\n",
+ " emb_fn = make_embed_text_fn(emb_model)\n",
+ " for doc in docs:\n",
+ " emb = emb_fn(doc)\n",
+ " document_embeddings.append(emb)\n",
+ " return document_embeddings"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "76a4383d-05b2-4532-b193-3d230e19d827",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "@weave.op()\n",
+ "def get_most_relevant_document(query, docs, document_embeddings):\n",
+ " # Convert query to embedding\n",
+ " query_embedding = genai.embed_content(model=emb_model,\n",
+ " content=query,\n",
+ " task_type=\"retrieval_query\")['embedding']\n",
+ " # Compute cosine similarity\n",
+ " similarities = np.dot(np.stack(document_embeddings), query_embedding)\n",
+ " # Get the index of the most similar document\n",
+ " most_relevant_doc_index = np.argmax(similarities)\n",
+ " return docs[most_relevant_doc_index]"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "93e58db5-5476-4dbe-9cc6-9b2823effd1e",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "# define the Morgan Stanley Research RAG Model\n",
+ "class MSResearchRAGModel(Model):\n",
+ " system_message: str\n",
+ " model_name: str = 'models/gemini-pro'\n",
+ "\n",
+ " @weave.op()\n",
+ " def predict(self, question: str, docs: list, add_context: bool) -> dict:\n",
+ " model = genai.GenerativeModel(self.model_name)\n",
+ "\n",
+ " RAG_Context = \"\"\n",
+ " # Retrieve the embeddings artifact\n",
+ " embeddings = weave.ref(\"MSRAG_Embeddings\").get()\n",
+ "\n",
+ " if add_context:\n",
+ " # Using Google Embeddings, get the relevant document for context\n",
+ " RAG_Context = get_most_relevant_document(question, docs, embeddings)\n",
+ "\n",
+ " query = f\"\"\"Use the following information to answer the subsequent question. If the answer cannot be found, write \"I don't know.\"\n",
+ "\n",
+ " Context from Morgan Stanley Research:\n",
+ " \\\"\\\"\\\"\n",
+ " {RAG_Context}\n",
+ " \\\"\\\"\\\"\n",
+ "\n",
+ " Question: {question}\"\"\"\n",
+ " prompt = f'{self.system_message}\\n\\n{query}'\n",
+ " response = model.generate_content(prompt) \n",
+ " model_output = response.text\n",
+ " \n",
+ " return model_output"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "e79a1a70-0845-4230-af62-f16aa260f7ed",
+ "metadata": {},
+ "outputs": [],
+ "source": [
+ "model = MSResearchRAGModel(\n",
+ " system_message=\"You are an expert in finance and answer questions related to finance, financial services, and financial markets. When responding based on provided information, be sure to cite the source.\"\n",
+ " )\n",
+ "\n",
+ "contexts = [\n",
+ " f\"\"\"Morgan Stanley has moved in new market managers on the East and West Coasts as part of changes that sent some of other management veterans into new roles, according to two sources.\n",
+ "\n",
+ " On the West Coast, Ken Sullivan, a 37-year-industry veteran who came to Morgan Stanley five years ago from RBC Wealth Management, has assumed an expanded role as market executive for a consolidated Beverly Hills and Los Angeles market, according to a source.\n",
+ "\n",
+ " Meanwhile, Greg Laetsch, a 44-year industry veteran who had been the complex manager in Los Angeles for the last 15 years, has moved to a non-producing senior advisor role to the LA market for Morgan Stanley, according to the same source.\n",
+ "\n",
+ " On the East Coast, Morgan Stanley hired Nikolas Totaro, a 19-year industry veteran, from Merrill Lynch, where he had worked for 14 years and had been most recently a market executive in Greenwich, Connecticut. Totaro will be a market manager reporting to John Palazzetti in the Midtown Wealth Management Center in Manhattan, according to the same source.\n",
+ "\n",
+ " Totaro is replacing Bill DeMatteo, a 21-year industry veteran who spent the last 14 years at Morgan Stanley, and who has returned to full-time production. DeMatteo has joined the Continuum Group at Morgan Stanley, which Barron’s ranked 20th among on its 2022 Top 100 Private Wealth Management Teams and listed as managing $7.2 billion in client assets.\n",
+ "\n",
+ " “His extensive 17 years of management experience at Morgan Stanley will be instrumental in shaping our approach to wealth management, fostering client relationships, and steering the team towards sustained growth and success,” Scott Siegel, leader of the Continuum Group, wrote on LinkedIn.\n",
+ "\n",
+ " Totaro and Laestch did not respond immediately to requests for comments sent through LinkedIn. Sullivan did not respond immediately to an emailed request. Both Morgan Stanley and Merrill spokespersons declined to comment about the changes.\n",
+ "\n",
+ " Totaro’s former Southern Connecticut market at Merrill included over 325 advisors and support staff across six offices in Greenwich, Stamford, Darien, Westport, Fairfield, and New Canaan, according to his LinkedIn profile.\n",
+ "\n",
+ " Separately, a former Raymond James Financial divisional director has joined Janney Montgomery Scott in Ponte Vedra Beach, Florida. Tom M. Galvin, who has spent the last 25 years with Raymond James, joins Janney as a complex director, according to an announcement.\n",
+ "\n",
+ " Galvin had most recently worked as a divisional director for Raymond James & Associates’ Southern Division. The firm consolidated the territory as part of a reorganization that took effect December 1. Galvin’s registration with Raymond James ended November 8, according to BrokerCheck.\n",
+ "\n",
+ " During his career, Galvin has held a range of branch and complex management roles in the North Atlantic and along the East Coast, according to his LinkedIn profile.\n",
+ "\n",
+ " “We’re looking forward to his experience and strong industry relationships as we continue to expand our team and geographic footprint,” stated Janney’s Florida Regional Director Frank Amigo, who joined from Raymond James in 2017.\n",
+ "\n",
+ " Galvin started his career in 1995 with RBC predecessor firm J. B. Hanauer & Co. and joined Raymond James two years later, according to BrokerCheck. He did not immediately respond to a request for comment sent through social media.\"\"\",\n",
+ " f\"\"\"Don’t Count on a March Rate Cut - Raise Rates\n",
+ " Inflation will be stickier than expected, delaying the start of long-awaited interest rate cuts.\n",
+ " Investors expecting a rate cut in March may be disappointed.\n",
+ " Six-month core consumer price inflation is likely to increase in the first quarter, prompting the Fed to watch and wait.\n",
+ " Unless there is an unexpectedly sharp economic downturn or weakening in the labor market, rate cuts are more likely to begin in June.\n",
+ "\n",
+ " Investors betting that the U.S. Federal Reserve will begin trimming interest rates in the first quarter of 2024 may be in for a disappointment.\n",
+ "\n",
+ " After the Fed’s December meeting, market expectations for a March rate cut jumped to surprising heights. Markets are currently putting a 75% chance, approximately, on rate cuts beginning in March. However, Morgan Stanley Research forecasts indicate that cuts are unlikely to come before June.\n",
+ "\n",
+ " Central bank policymakers have likewise pushed back on investors’ expectations. As Federal Reserve Chairman Jerome Powell said in December 2023, when it comes to inflation, “No one is declaring victory. That would be premature.”1\n",
+ "\n",
+ " Here’s why we still expect that rates are likely to hold steady until the middle of 2024.\n",
+ "\n",
+ " Inflation Outlook\n",
+ " A renewed uptick in core consumer prices is likely in the first quarter, as prices for services remain elevated, led by healthcare, housing and car insurance. Additionally, in monitoring inflation, the Fed will be watching the six-month average—which means that weaker inflation numbers from summer 2023 will drop out of the comparison window. Although annual inflation rates should continue to decline, the six-month gauge could nudge higher, to 2.4% in January and 2.69% in February.\n",
+ "\n",
+ " Labor markets have also proven resilient, giving Fed policymakers room to watch and wait.\n",
+ "\n",
+ " Data-Driven Expectations\n",
+ " Data is critical to the Fed’s decisions and Morgan Stanley’s forecasts, and both could change as new information emerges. At the March policy meeting, the Fed will have only data from January and February in hand, which likely won’t provide enough information for the central bank to be ready to announce a rate cut. The Fed is likely to hold rates steady in March unless nonfarm payrolls add fewer than 50,000 jobs in February and core prices gain less than 0.2% month-over-month. However, unexpected swings in employment and consumer prices, or a marked change in financial conditions or labor force participation, could trigger a cut earlier than we anticipate.\n",
+ "\n",
+ " There are scenarios in which the Fed could cut rates before June, including: a pronounced deterioration in credit conditions, signs of a sharp economic downturn, or slower-than-expected job growth coupled with weak inflation. Weaker inflation and payrolls could bolster the chances of a May rate cut especially.\n",
+ "\n",
+ "When trying to assess timing, statements from Fed insiders are good indicators because they tend to communicate premeditated changes in policy well in advance. If the Fed plans to hold rates steady in March, they might emphasize patience, or talk about inflation remaining elevated. If they’re considering a cut, their language will shift, and they may begin to say that a change in policy may be appropriate “in coming meetings,” “in coming months” or even “soon.” But a long heads up is not guaranteed.\n",
+ "https://www.morganstanley.com/ideas/fed-rate-cuts-2024\n",
+ "\"\"\",\n",
+ " f\"\"\"What Global Turmoil Could Mean for Investors\n",
+ "Weighing the investment impacts of global conflict and geopolitical tensions to international trade, oil prices and China equities.\n",
+ "Morgan Stanley Research expects cargo shipping to remain robust despite Red Sea disruption.\n",
+ "Crude oil shipments and oil prices should see limited negative impact from regional conflict.\n",
+ "Long-term trends could bring growth in Japan and India.\n",
+ "In a multipolar world, competition for global power is increasingly leading countries to protect their military and economic interests by erecting new barriers to cross-border commerce in key industries such as technology and renewable energy. As geopolitics and national security are to a growing degree driving how goods flow and where big capital investments are made, it’s that much more crucial for investors to know how to pick through a dizzying amount of information and focus on what’s relevant. But it’s hard to do with a seemingly endless series of alerts lighting up your phone.\n",
+ "\n",
+ "In particular, potential ripples from U.S.-China relations as well as U.S. military involvement in the Middle East could be important for investors. Morgan Stanley Research pared back the headlines and market noise to home in on three key takeaways.\n",
+ "\n",
+ "Gauging Red Sea Disruption\n",
+ "Commercial cargo ships in the Red Sea handle about 12% of global trade. Attacks on these ships by Houthi militants, and ongoing U.S. military strikes to quell the disruption, have raised concerns that supply chains could see pandemic-type disruption—and a corresponding spike in inflation.\n",
+ "\n",
+ "However, my colleagues and I expect the flow of container ships to remain robust, even if that flow is redirected to avoid the Red Sea, which serves as an outlet for vessels coming out of the Suez Canal. Although there has been a recent 200% surge in freight rates, there have not been fundamental cost increases for shipping. Additionally, there’s currently a surplus of container ships. Lengthy reroutes around the Southern tip of Africa by carriers to avoid the conflict zone may cause delays, but they should have minimal impact to inflation in Europe. The risks to the U.S. retail sector should be similarly manageable.\n",
+ "\n",
+ "Resilience in Oil and the Countries That Produce it\n",
+ "The Middle East is responsible for supplying and producing the majority of the world’s oil, so escalating conflict in the region naturally puts pressure on energy supply, as well the economic growth of relevant countries. However, the threat of weaker growth, higher inflation and erosion of support from allies offer these countries an incentive to contain the conflict. As a result, there’s unlikely to be negative impact to the debt of oil-producing countries in the region. Crude oil shipments should also see limited impacts, though oil prices could spike and European oil refiners, in particular, could face pressure if disruption in the Strait of Hormuz, which traffics about a fifth of oil supplies daily, accelerates.\n",
+ "\n",
+ "Opportunities in Asia Emerging in Japan and India\n",
+ "China has significant work to do to retool its economic engine away from property, infrastructure and debt, leading Morgan Stanley economists to predict gross-domestic product growth of 4.2% for 2024 (below the government’s 5% target), slowing to 1.7% from 2025 to 2027. As a result, China’s relatively low equity market valuation still faces challenges, including risks such as U.S. policy restricting future investment. But elsewhere in Asia—particularly in standouts Japan and India—positive long-term trends should drive markets higher. These include fiscal rebalancing, increased digitalization and increasing shifts of manufacturing and supply hubs in a multipolar world.\n",
+ "\n",
+ "For a deeper insights and analysis, ask your Morgan Stanley Representative or Financial Advisor for the full report, “Paying Attention to Global Tension.”\n",
+ "https://www.morganstanley.com/ideas/geopolitical-risk-2024\n",
+ "\"\"\",\n",
+ " f\"\"\"What 'Edge AI' Means for Smartphones\n",
+ "As generative artificial intelligence gets embedded in devices, consumers should see brand new features while smartphone manufacturers could see a sales lift.\n",
+ "Advances in artificial intelligence are pushing computing from the cloud directly onto consumer devices, such as smartphones, notebooks, wearables, automobiles and drones.\n",
+ "This trend is expected to drive smartphone sales during the next two years, reversing a slowdown that began in 2021.\n",
+ "Consumers can expect new features, such as touch-free control of their phones, desktop-quality gaming and real-time photo retouching.\n",
+ "As the adoption of generative artificial intelligence accelerates, more computing will be done in the hands of end users—literally. Increasingly, AI will be embedded in consumer devices such as smartphones, notebooks, wearables, automobiles and drones, creating new opportunities and challenges for the manufacturers of these devices.\n",
+ "\n",
+ "Generative AI’s phenomenal capabilities are power-intensive. So far, the processing needed to run sophisticated, mainstream generative AI models can only take place in the cloud. While the cloud will remain the foundation of AI infrastructure, more AI applications, functions and services require faster or more secure computing closer to the consumer. “That’s driving the need for AI algorithms that run locally on the devices rather than on a centralized cloud—or what’s known as the AI at the Edge,” says Ed Stanley, Morgan Stanley’s Head of Thematic Research in London.\n",
+ "\n",
+ "By 2025, Edge AI will be responsible for half of all enterprise data created, according to an estimate by technology market researcher Gartner Inc. While there are many hurdles to reaching commercial viability, the opportunity to tap into 30 billion devices could reduce cost, increase personalization, and improve security and privacy. In addition, faster algorithms on the Edge can reduce latency (i.e., the lag in an app’s response time as it communicates with the cloud).\n",
+ "\n",
+ "“If 2023 was the year of generative AI, 2024 could be the year the technology moves to the Edge,” says Stanley. “We think this trend will pick up steam in 2024, and along with it, opportunities for hardware makers and component suppliers that can help put AI directly into consumers' hands.”\n",
+ "\n",
+ "New Smartphones Lead the Charge\n",
+ "Smartphones currently on the market rely on traditional processors and cloud-based computing, and the only AI-enabled programs are features like face recognition, voice assist and low-light photography. Device sales have slowed in recent years, and many investors expect that smartphones will follow the trajectory of personal computers, with multi-year downturns as consumers hold onto their devices for longer due to lack of new features, sensitivity to pricing and other factors.\n",
+ "\n",
+ "But thanks in part to Edge AI, Morgan Stanley analysts think the smartphone market is poised for an upswing and predict that shipments, which have slowed since 2021, will rise by 3.9% this year and 4.4% next year.\n",
+ "\n",
+ "“Given the size of the smartphone market and consumers’ familiarity with them, it makes sense that they will lead the way in bringing AI to the Edge,” says Morgan Stanley’s U.S. Hardware analyst Erik Woodring. “This year should bring a rollout of generative AI-enabled operating systems, as well as next-generation devices and voice assistants that could spur a cycle of smartphone upgrades.”\n",
+ "\n",
+ "However, the move to the Edge will require new smartphone capabilities, especially to improve battery life, power consumption, processing speed and memory. Manufacturers with the strongest brands and balance sheets are best positioned to take the lead in the hardware arms race.\n",
+ "\n",
+ "Killer Apps\n",
+ "In addition to hardware, AI itself continues to evolve. New generations of AI models are designed be more flexible and adaptable for a wide range of uses, including Edge devices. Other beneficiaries include smartphone memory players, integrated circuit makers and camera parts suppliers that support new AI applications.\n",
+ "\n",
+ "What can you expect from your phone in the next year?\n",
+ "\n",
+ "“Always-sensing cameras” that automatically activate or lock the screen by detecting if the user is looking at it without the need to touch the screen. This feature could also automatically launch applications such as online payment and food ordering by detecting bar codes.\n",
+ "\n",
+ "Gesture controls for when the user is unable to hold their devices, such as while cooking or exercising.\n",
+ "\n",
+ "Desktop-quality gaming experiences that offer ultra-realistic graphics with cinematic detail, all with smoother interactions and blazing-fast response times.\n",
+ "\n",
+ "Professional-level photography in which image processors enhance photos and video in real time by recognizing each element in a frame—faces, hair, glasses, objects—and fine tune each, eliminating the need for retouching later.\n",
+ "\n",
+ "Smarter voice assistance that is more responsive and tuned the user’s voice and speech patterns, and can launch or suggest apps based on auditory clues.\n",
+ "\n",
+ "“With Edge AI becoming part of everyday life, we see significant opportunities ahead as new hardware provides a platform for developers to create ground-breaking generative AI apps, which could trigger a new hardware product cycle that liftsservices sales,” says Woodring.\n",
+ "\n",
+ "For deeper insights and analysis, ask your Morgan Stanley Representative or Financial Advisor for the full reports, “Tech Diffusion: Edge AI—Growing Impetus” (Nov. 7, 2023), “Edging Into a Smartphone Upcycle” (Nov. 9, 2023) and “Edge AI: Product Releases on Track, But Where Are Killer Apps?”\n",
+ "https://www.morganstanley.com/ideas/edge-ai-devices-diffusion\"\"\",\n",
+ "]\n",
+ "\n",
+ "questions = [\n",
+ " \"Can you summarize the latest changes to Morgan Stanley market managers?\",\n",
+ " \"When will the fed lower rates?\",\n",
+ " \"What are the top market risks?\",\n",
+ " \"How will AI impact the smartphone market?\",\n",
+ "]\n",
+ "\n",
+ "# Calculate the document embeddings and store in weave\n",
+ "document_embeddings = docs_to_embeddings(contexts)\n",
+ "embeddings_ref = weave.publish(document_embeddings, \"MSRAG_Embeddings\")\n",
+ "\n",
+ "for i in range(0, len(questions)):\n",
+ " # Using Google Embeddings\n",
+ " model.predict(questions[i], contexts, True)\n",
+ "\n",
+ "# Not using Google Embeddings\n",
+ "model.predict(questions[1], contexts, False)"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": null,
+ "id": "9b5f7c60-41dc-4fe2-b3fd-727ace6843be",
+ "metadata": {},
+ "outputs": [],
+ "source": []
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3 (ipykernel)",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.10.12"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 5
+}