From 52c132ec13b7206d5ee10a52ee25f9a8ced628d1 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Fri, 22 Nov 2024 14:47:01 -0500
Subject: [PATCH 01/69] llama_stack_client LLM provider

---
 agentops/llms/__init__.py                     |  15 ++
 agentops/llms/llama_stack_client.py           | 142 +++++++++++++++
 .../llama_stack_client_examples/README.md     |  31 ++++
 .../llama_stack_client_examples.ipynb         | 163 ++++++++++++++++++
 .../providers/llama_stack_client_canary.py    |  53 ++++++
 5 files changed, 404 insertions(+)
 create mode 100644 agentops/llms/llama_stack_client.py
 create mode 100644 examples/llama_stack_client_examples/README.md
 create mode 100644 examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
 create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary.py

diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py
index a5852d8cd..3c29167c6 100644
--- a/agentops/llms/__init__.py
+++ b/agentops/llms/__init__.py
@@ -5,6 +5,8 @@
 
 from packaging.version import Version, parse
 
+from agentops.llms.llama_stack_client import LlamaStackClientProvider
+
 from ..log_config import logger
 
 from .cohere import CohereProvider
@@ -35,6 +37,9 @@ class LlmTracker:
             "5.4.0": ("chat", "chat_stream"),
         },
         "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")},
+        "llama_stack_client": {
+            "0.0.53": ("resources.InferenceResource.chat_completion"),
+        },
         "groq": {
             "0.9.0": ("Client.chat", "AsyncClient.chat"),
         },
@@ -151,6 +156,15 @@ def override_api(self):
                     else:
                         logger.warning(f"Only AI21>=2.0.0 supported. v{module_version} found.")
 
+                if api == "llama_stack_client":
+                    module_version = version(api)
+
+                    if Version(module_version) >= parse("0.0.53"):
+                        provider = LlamaStackClientProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(f"Only LlamaStackClient>=0.0.53 supported. v{module_version} found.")
+
     def stop_instrumenting(self):
         OpenAiProvider(self.client).undo_override()
         GroqProvider(self.client).undo_override()
@@ -160,3 +174,4 @@ def stop_instrumenting(self):
         AnthropicProvider(self.client).undo_override()
         MistralProvider(self.client).undo_override()
         AI21Provider(self.client).undo_override()
+        LlamaStackClientProvider(self.client).undo_override()
diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
new file mode 100644
index 000000000..8218fb54b
--- /dev/null
+++ b/agentops/llms/llama_stack_client.py
@@ -0,0 +1,142 @@
+import inspect
+import pprint
+import sys
+from typing import Optional
+
+from ..event import LLMEvent, ErrorEvent
+from ..session import Session
+from ..log_config import logger
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from .instrumented_provider import InstrumentedProvider
+
+
+class LlamaStackClientProvider(InstrumentedProvider):
+    original_complete = None
+    original_complete_async = None
+    original_stream = None
+    original_stream_async = None
+
+    def __init__(self, client):
+        super().__init__(client)
+        self._provider_name = "LlamaStack"
+
+    def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict:
+        """Handle responses for LlamaStack"""
+        from llama_stack_client import LlamaStackClient
+        
+        llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            llm_event.session_id = session.session_id
+
+        def handle_stream_chunk(chunk: dict):
+            # NOTE: prompt/completion usage not returned in response when streaming
+            # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
+            if llm_event.returns is None:
+                llm_event.returns = chunk.event
+
+            try:
+                accumulated_delta = llm_event.returns.delta
+                llm_event.agent_id = check_call_stack_for_agent_id()
+                llm_event.model = kwargs["model_id"]
+                llm_event.prompt = kwargs["messages"]
+
+                # NOTE: We assume for completion only choices[0] is relevant
+                choice = chunk.event
+
+                if choice.delta:
+                    llm_event.returns.delta += choice.delta
+
+                if choice.event_type == "complete":
+                    
+                    llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]]
+                    llm_event.agent_id = check_call_stack_for_agent_id()
+                    llm_event.completion = accumulated_delta
+                    llm_event.prompt_tokens = None
+                    llm_event.completion_tokens = None
+                    llm_event.end_timestamp = get_ISO_time()
+                    self._safe_record(session, llm_event)
+
+            except Exception as e:
+                self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n"
+                )
+
+        # if the response is a generator, decorate the generator
+        if kwargs["stream"] == True:
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        elif inspect.isasyncgen(response):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        try:
+            llm_event.returns = response
+            llm_event.agent_id = check_call_stack_for_agent_id()            
+            llm_event.model = kwargs["model_id"]
+            llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]]
+            llm_event.prompt_tokens = None
+            llm_event.completion = response.completion_message.content
+            llm_event.completion_tokens = None
+            llm_event.end_timestamp = get_ISO_time()
+
+            self._safe_record(session, llm_event)
+        except Exception as e:
+            self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def _override_complete(self):
+        from llama_stack_client.resources import InferenceResource
+
+        global original_complete
+        original_complete = InferenceResource.chat_completion
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = original_complete(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        InferenceResource.chat_completion = patched_function
+
+    def override(self):
+        self._override_complete()
+        # self._override_complete_async()
+        # self._override_stream()
+        # self._override_stream_async()
+
+    def undo_override(self):
+        if (
+            self.original_complete is not None
+        ):
+
+            from llama_stack_client.resources import InferenceResource
+            InferenceResource.chat_completion = self.original_complete
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
new file mode 100644
index 000000000..ef6d6a546
--- /dev/null
+++ b/examples/llama_stack_client_examples/README.md
@@ -0,0 +1,31 @@
+## How to run Llama Stack server
+
+export LLAMA_STACK_PORT=5001
+export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
+
+docker run \
+  -it \
+  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
+  -v ~/.llama:/root/.llama \
+  -v ./run.yaml:/root/my-run.yaml \
+  llamastack/distribution-ollama \
+  --yaml-config /root/my-run.yaml \
+  --port $LLAMA_STACK_PORT \
+  --env INFERENCE_MODEL=$INFERENCE_MODEL \
+  --env OLLAMA_URL=http://host.docker.internal:11434
+
+## Example Llama Stack server config
+
+https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml
+
+## Reference documentation
+
+- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#setting-up-ollama-server
+- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#running-llama-stack
+
+- https://github.com/meta-llama/llama-stack-client-python
+- https://github.com/meta-llama/llama-stack
+- download https://ollama.com/
+- https://www.llama.com/docs/getting_the_models/meta/
+
+## 
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
new file mode 100644
index 000000000..c5d9dceb9
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -0,0 +1,163 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama Stack Client Examples\n",
+    "Use the llama_stack_client library to interact with a Llama Stack server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's install the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U llama-stack-client\n",
+    "%pip install -U agentops"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then import them"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+    "from llama_stack_client.types import UserMessage\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import agentops"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n",
+    "\n",
+    "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
+    "port = 5001 # LLAMA_STACK_PORT\n",
+    "\n",
+    "full_host = f\"http://{host}:{port}\"\n",
+    "\n",
+    "client = LlamaStackClient(\n",
+    "    base_url=f\"{full_host}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Completion Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    stream=False\n",
+    ")\n",
+    "\n",
+    "print(f\"> Response: {response}\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Completion with Streaming Example"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    stream=True\n",
+    ")\n",
+    "\n",
+    "async for log in EventLogger().log(response):\n",
+    "    log.print()\n",
+    "\n",
+    "agentops.end_session(\"Success\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ops",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py
new file mode 100644
index 000000000..f61ac3473
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py
@@ -0,0 +1,53 @@
+import asyncio
+
+import agentops
+import os
+from dotenv import load_dotenv
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from llama_stack_client.lib.inference.event_logger import EventLogger
+
+load_dotenv()
+
+agentops.init(default_tags=["llama-stack-client-provider-test"])
+
+host = "0.0.0.0" # LLAMA_STACK_HOST
+port = 5001 # LLAMA_STACK_PORT
+
+full_host = f"http://{host}:{port}"
+
+client = LlamaStackClient(
+    base_url=f"{full_host}",
+)
+
+response = client.inference.chat_completion(
+    messages=[
+        UserMessage(
+            content="hello world, write me a 3 word poem about the moon",
+            role="user",
+        ),
+    ],
+    model_id="meta-llama/Llama-3.2-3B-Instruct",
+    stream=False
+)
+
+async def stream_test():
+  response = client.inference.chat_completion(
+      messages=[
+          UserMessage(
+              content="hello world, write me a 3 word poem about the moon",
+              role="user",
+          ),
+      ],
+      model_id="meta-llama/Llama-3.2-3B-Instruct",
+      stream=True
+  )
+
+  async for log in EventLogger().log(response):
+      log.print()
+
+
+async def main():
+    await stream_test()
+
+agentops.end_session(end_state="Success")

From ec8445db6bc92b5478cbc0807527f897bfece7ad Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Fri, 22 Nov 2024 23:16:38 -0600
Subject: [PATCH 02/69] ruff

Signed-off-by: Teo <teocns@gmail.com>
---
 agentops/llms/llama_stack_client.py           | 17 +++++-----
 .../providers/llama_stack_client_canary.py    | 32 ++++++++++---------
 2 files changed, 25 insertions(+), 24 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 8218fb54b..b9ec8bd7b 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -23,7 +23,7 @@ def __init__(self, client):
     def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict:
         """Handle responses for LlamaStack"""
         from llama_stack_client import LlamaStackClient
-        
+
         llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
         if session is not None:
             llm_event.session_id = session.session_id
@@ -47,8 +47,9 @@ def handle_stream_chunk(chunk: dict):
                     llm_event.returns.delta += choice.delta
 
                 if choice.event_type == "complete":
-                    
-                    llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]]
+                    llm_event.prompt = [
+                        {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                    ]
                     llm_event.agent_id = check_call_stack_for_agent_id()
                     llm_event.completion = accumulated_delta
                     llm_event.prompt_tokens = None
@@ -88,9 +89,9 @@ async def async_generator():
 
         try:
             llm_event.returns = response
-            llm_event.agent_id = check_call_stack_for_agent_id()            
+            llm_event.agent_id = check_call_stack_for_agent_id()
             llm_event.model = kwargs["model_id"]
-            llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]]
+            llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]]
             llm_event.prompt_tokens = None
             llm_event.completion = response.completion_message.content
             llm_event.completion_tokens = None
@@ -134,9 +135,7 @@ def override(self):
         # self._override_stream_async()
 
     def undo_override(self):
-        if (
-            self.original_complete is not None
-        ):
-
+        if self.original_complete is not None:
             from llama_stack_client.resources import InferenceResource
+
             InferenceResource.chat_completion = self.original_complete
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py
index f61ac3473..0955f9ccc 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py
@@ -11,8 +11,8 @@
 
 agentops.init(default_tags=["llama-stack-client-provider-test"])
 
-host = "0.0.0.0" # LLAMA_STACK_HOST
-port = 5001 # LLAMA_STACK_PORT
+host = "0.0.0.0"  # LLAMA_STACK_HOST
+port = 5001  # LLAMA_STACK_PORT
 
 full_host = f"http://{host}:{port}"
 
@@ -28,26 +28,28 @@
         ),
     ],
     model_id="meta-llama/Llama-3.2-3B-Instruct",
-    stream=False
+    stream=False,
 )
 
+
 async def stream_test():
-  response = client.inference.chat_completion(
-      messages=[
-          UserMessage(
-              content="hello world, write me a 3 word poem about the moon",
-              role="user",
-          ),
-      ],
-      model_id="meta-llama/Llama-3.2-3B-Instruct",
-      stream=True
-  )
+    response = client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-3B-Instruct",
+        stream=True,
+    )
 
-  async for log in EventLogger().log(response):
-      log.print()
+    async for log in EventLogger().log(response):
+        log.print()
 
 
 async def main():
     await stream_test()
 
+
 agentops.end_session(end_state="Success")

From 2fac7a09e75781e3c08472690e5fd6f83eab6ec7 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sun, 24 Nov 2024 05:52:08 -0500
Subject: [PATCH 03/69] refining and documenting the llama stack integration
 support & process for future reference

---
 .../llama_stack_client_examples/README.md     |  59 ++++++--
 .../llama_stack_client_examples.ipynb         | 141 ++++++++++++++++--
 2 files changed, 176 insertions(+), 24 deletions(-)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index ef6d6a546..c24bf1ac0 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -1,31 +1,70 @@
-## How to run Llama Stack server
+# TLDR
 
+How to set up a Llama Stack server for supporting the `llama_stack_client_example.ipynb` examples
+
+## Disclaimer
+
+As of 11/2024, Llama Stack is new and is subject to breaking changes.
+
+Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/
+
+## High-level steps
+
+https://llama-stack.readthedocs.io/en/latest/getting_started/index.html#
+
+1. Download, install, & start Ollama
+2. Start the Llama Stack Server
+3. Call the Llama Stack Server with a Llama Stack Client
+
+### 1 - Download, install, & start Ollama
+
+https://ollama.com/
+
+Ollama has an easy-to-use installer available for macOS, Linux, and Windows.
+
+```sh
+export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
+ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
+```
+
+### 2 - Start the Llama Stack server
+
+You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml`
+
+```sh
 export LLAMA_STACK_PORT=5001
 export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
-
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
-  -v ./run.yaml:/root/my-run.yaml \
+  -v ./llama-stack-server-config.yaml:/root/my-run.yaml \
   llamastack/distribution-ollama \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
   --env INFERENCE_MODEL=$INFERENCE_MODEL \
   --env OLLAMA_URL=http://host.docker.internal:11434
+```
 
-## Example Llama Stack server config
+### 3 - Call the Llama Stack Server with a Llama Stack Client
 
-https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml
+ie: Check out the examples in the `llama_stack_client_examples.ipynb` file
 
-## Reference documentation
+## Common Gotchas
 
-- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#setting-up-ollama-server
-- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#running-llama-stack
+1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
 
+## Useful ollama commands
+
+- `ollama list`
+- `ollama help`
+- `ollama ps`
+
+## Reference links used during development
+
+- https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml
+- https://llama-stack.readthedocs.io
 - https://github.com/meta-llama/llama-stack-client-python
 - https://github.com/meta-llama/llama-stack
 - download https://ollama.com/
 - https://www.llama.com/docs/getting_the_models/meta/
-
-## 
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index c5d9dceb9..d2343e7a2 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,12 +17,76 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
+      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
+      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
+      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n",
+      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Collecting python-dotenv\n",
+      "  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
+      "Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
+      "Installing collected packages: python-dotenv\n",
+      "Successfully installed python-dotenv-1.0.1\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install -U llama-stack-client\n",
-    "%pip install -U agentops"
+    "%pip install -U agentops\n",
+    "%pip install -U python-dotenv"
    ]
   },
   {
@@ -34,7 +98,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -55,7 +119,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -65,9 +129,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=31b39ff6-59b5-417d-a59d-7646c86de968\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n",
     "\n",
@@ -90,10 +162,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 6,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='\"Silver glow descends\"', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+     ]
+    }
+   ],
    "source": [
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n",
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",
@@ -105,7 +186,8 @@
     "    stream=False\n",
     ")\n",
     "\n",
-    "print(f\"> Response: {response}\")"
+    "print(f\"> Response: {response}\")\n",
+    "agentops.end_session(\"Success\")"
    ]
   },
   {
@@ -117,10 +199,34 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[97m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.0s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
+    "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",
@@ -137,11 +243,18 @@
     "\n",
     "agentops.end_session(\"Success\")"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
   "kernelspec": {
-   "display_name": "ops",
+   "display_name": "venv",
    "language": "python",
    "name": "python3"
   },
@@ -155,7 +268,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.9.19"
+   "version": "3.12.2"
   }
  },
  "nbformat": 4,

From 4299bcbd7dd06dd553296f8ca1c86965e693e896 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sun, 24 Nov 2024 06:02:40 -0500
Subject: [PATCH 04/69] fixing error in the core_manual_test for Llama Stack

---
 .../providers/llama_stack_client_canary.py                  | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py
index 0955f9ccc..75bb4ac00 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py
@@ -48,8 +48,10 @@ async def stream_test():
         log.print()
 
 
-async def main():
-    await stream_test()
+def main():
+    asyncio.run(stream_test())
+
+main()
 
 
 agentops.end_session(end_state="Success")

From 5fb0b36ce49008951499d59edf3a73aac4949ea0 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sun, 24 Nov 2024 06:06:31 -0500
Subject: [PATCH 05/69] removing unnecessary elif branch in
 llama_stack_client.py llm provider

---
 agentops/llms/llama_stack_client.py | 9 ---------
 1 file changed, 9 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index b9ec8bd7b..b9d25254c 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -78,15 +78,6 @@ def generator():
 
             return generator()
 
-        elif inspect.isasyncgen(response):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
         try:
             llm_event.returns = response
             llm_event.agent_id = check_call_stack_for_agent_id()

From cfa18996cd55001e7dd01d17d8b659768a2d1295 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sun, 24 Nov 2024 06:11:11 -0500
Subject: [PATCH 06/69] updating llama stack examples & documentation

---
 .../llama-stack-server-config.yaml            | 54 +++++++++++++++++++
 .../llama_stack_client_examples.ipynb         |  8 +--
 2 files changed, 58 insertions(+), 4 deletions(-)
 create mode 100644 examples/llama_stack_client_examples/llama-stack-server-config.yaml

diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml
new file mode 100644
index 000000000..32137fd67
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama-stack-server-config.yaml
@@ -0,0 +1,54 @@
+version: '2'
+image_name: ollama
+docker_image: null
+conda_env: ollama
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+shields: []
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index d2343e7a2..33d89f44e 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -162,19 +162,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
-     "name": "stdout",
+     "name": "stderr",
      "output_type": "stream",
      "text": [
-      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='\"Silver glow descends\"', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n"
      ]
     }
    ],
    "source": [
-    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n",
+    "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",

From af70799d52fd8c0589baafe6bc7e8fa3e07611fc Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sun, 24 Nov 2024 06:12:38 -0500
Subject: [PATCH 07/69] updating llama_stack_client_examples.ipynb

---
 .../llama_stack_client_examples.ipynb         | 27 ++++++++++++++-----
 1 file changed, 21 insertions(+), 6 deletions(-)

diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index 33d89f44e..3e2b6b059 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -162,7 +162,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
@@ -171,6 +171,21 @@
      "text": [
       "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n"
      ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver light descends', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 3.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n"
+     ]
     }
    ],
    "source": [
@@ -199,29 +214,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[97m\u001b[0m\n"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.0s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n"
      ]
     }
    ],

From 0b56e40564e7fd8d4cf01fcccf16bd9604baafce Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 25 Nov 2024 13:21:00 -0500
Subject: [PATCH 08/69] saving tweaks to Llama Stack client examples and
 related README.md after live demo to Maintainers team

---
 .../llama_stack_client_examples/README.md     |  2 +-
 .../llama_stack_client_examples.ipynb         | 48 +++++++------------
 2 files changed, 19 insertions(+), 31 deletions(-)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index c24bf1ac0..176b8c8a3 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -38,7 +38,7 @@ docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
   -v ~/.llama:/root/.llama \
-  -v ./llama-stack-server-config.yaml:/root/my-run.yaml \
+  -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \
   llamastack/distribution-ollama \
   --yaml-config /root/my-run.yaml \
   --port $LLAMA_STACK_PORT \
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index 3e2b6b059..1b4bd3f7f 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
     {
@@ -71,11 +71,7 @@
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
       "Note: you may need to restart the kernel to use updated packages.\n",
-      "Collecting python-dotenv\n",
-      "  Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n",
-      "Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n",
-      "Installing collected packages: python-dotenv\n",
-      "Successfully installed python-dotenv-1.0.1\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
@@ -98,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -119,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -129,19 +125,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=31b39ff6-59b5-417d-a59d-7646c86de968\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
-    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
     "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
     "port = 5001 # LLAMA_STACK_PORT\n",
@@ -162,29 +150,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver light descends', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver glowing crescent', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 3.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n"
      ]
     }
    ],
@@ -193,7 +181,7 @@
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",
-    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            content=\"write me a 3 word poem about the moon\",\n",
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
@@ -214,29 +202,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.2s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n"
      ]
     }
    ],

From f855873754a6b823c88c40128e061d6fbe9af7b6 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 25 Nov 2024 23:27:02 -0500
Subject: [PATCH 09/69] saving v1 of patching of Llama Stack Agent.create_turn
 method

---
 agentops/llms/llama_stack_client.py           | 219 ++++++++++++------
 .../llama_stack_client_examples.ipynb         |  38 +--
 .../llama_stack_client_canary/agent_canary.py |  97 ++++++++
 .../inference_canary.py}                      |  35 +--
 4 files changed, 284 insertions(+), 105 deletions(-)
 create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
 rename tests/core_manual_tests/providers/{llama_stack_client_canary.py => llama_stack_client_canary/inference_canary.py} (61%)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index b9d25254c..d0554c170 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -1,7 +1,7 @@
 import inspect
 import pprint
 import sys
-from typing import Optional
+from typing import Dict, Optional
 
 from ..event import LLMEvent, ErrorEvent
 from ..session import Session
@@ -9,86 +9,135 @@
 from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
 from .instrumented_provider import InstrumentedProvider
 
-
 class LlamaStackClientProvider(InstrumentedProvider):
     original_complete = None
-    original_complete_async = None
-    original_stream = None
-    original_stream_async = None
+    original_create_turn = None
+    
 
     def __init__(self, client):
         super().__init__(client)
         self._provider_name = "LlamaStack"
 
-    def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict:
+    def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict:
         """Handle responses for LlamaStack"""
-        from llama_stack_client import LlamaStackClient
-
-        llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            llm_event.session_id = session.session_id
-
-        def handle_stream_chunk(chunk: dict):
-            # NOTE: prompt/completion usage not returned in response when streaming
-            # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-            if llm_event.returns is None:
-                llm_event.returns = chunk.event
-
-            try:
-                accumulated_delta = llm_event.returns.delta
-                llm_event.agent_id = check_call_stack_for_agent_id()
-                llm_event.model = kwargs["model_id"]
-                llm_event.prompt = kwargs["messages"]
-
-                # NOTE: We assume for completion only choices[0] is relevant
-                choice = chunk.event
-
-                if choice.delta:
-                    llm_event.returns.delta += choice.delta
-
-                if choice.event_type == "complete":
-                    llm_event.prompt = [
-                        {"content": message.content, "role": message.role} for message in kwargs["messages"]
-                    ]
-                    llm_event.agent_id = check_call_stack_for_agent_id()
-                    llm_event.completion = accumulated_delta
-                    llm_event.prompt_tokens = None
-                    llm_event.completion_tokens = None
-                    llm_event.end_timestamp = get_ISO_time()
-                    self._safe_record(session, llm_event)
-
-            except Exception as e:
-                self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
-
-                kwargs_str = pprint.pformat(kwargs)
-                chunk = pprint.pformat(chunk)
-                logger.warning(
-                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
-                    f"chunk:\n {chunk}\n"
-                    f"kwargs:\n {kwargs_str}\n"
-                )
-
-        # if the response is a generator, decorate the generator
-        if kwargs["stream"] == True:
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
         try:
-            llm_event.returns = response
-            llm_event.agent_id = check_call_stack_for_agent_id()
-            llm_event.model = kwargs["model_id"]
-            llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]]
-            llm_event.prompt_tokens = None
-            llm_event.completion = response.completion_message.content
-            llm_event.completion_tokens = None
-            llm_event.end_timestamp = get_ISO_time()
-
-            self._safe_record(session, llm_event)
+            llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+            if session is not None:
+                llm_event.session_id = session.session_id
+
+            def handle_stream_chunk(chunk: dict):
+                # NOTE: prompt/completion usage not returned in response when streaming
+                # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
+                if llm_event.returns is None:
+                    llm_event.returns = chunk.event
+
+                try:
+                    accumulated_delta = llm_event.returns.delta
+                    llm_event.agent_id = check_call_stack_for_agent_id()
+                    llm_event.model = kwargs["model_id"]
+                    llm_event.prompt = kwargs["messages"]
+
+                    # NOTE: We assume for completion only choices[0] is relevant
+                    choice = chunk.event
+
+                    if choice.delta:
+                        llm_event.returns.delta += choice.delta
+
+                    if choice.event_type == "complete":
+                        llm_event.prompt = [
+                            {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                        ]
+                        llm_event.agent_id = check_call_stack_for_agent_id()
+                        llm_event.completion = accumulated_delta
+                        llm_event.prompt_tokens = None
+                        llm_event.completion_tokens = None
+                        llm_event.end_timestamp = get_ISO_time()
+                        self._safe_record(session, llm_event)
+
+                except Exception as e:
+                    self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+
+                    kwargs_str = pprint.pformat(kwargs)
+                    chunk = pprint.pformat(chunk)
+                    logger.warning(
+                        f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                        f"chunk:\n {chunk}\n"
+                        f"kwargs:\n {kwargs_str}\n"
+                    )
+
+            def handle_stream_agent(chunk: dict):
+                # NOTE: prompt/completion usage not returned in response when streaming
+                # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
+                
+                if llm_event.returns is None:
+                    llm_event.returns = chunk.event
+
+                try:
+                    if chunk.event.payload.event_type == "step_start":
+                        pass
+                    elif chunk.event.payload.event_type == "turn_start":
+                        pass
+                    elif chunk.event.payload.event_type == "step_progress":
+                    
+                        if (chunk.event.payload.step_type == "inference"):
+                            delta = chunk.event.payload.text_delta_model_response
+                            llm_event.agent_id = check_call_stack_for_agent_id()
+                            llm_event.model = "Llama Stack"
+                            llm_event.prompt = kwargs["messages"]
+
+                            if llm_event.completion:
+                                llm_event.completion += delta
+                            else:
+                                llm_event.completion = delta
+                                
+                    elif chunk.event.payload.event_type == "step_complete":
+                        pass
+                    elif chunk.event.payload.event_type == "turn_complete":
+                        llm_event.prompt = [
+                            {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
+                        ]
+                        llm_event.agent_id = check_call_stack_for_agent_id()
+                        llm_event.model = metadata.get("model_id", "Unable to identify model")
+                        llm_event.prompt_tokens = None
+                        llm_event.completion_tokens = None
+                        llm_event.end_timestamp = get_ISO_time()
+                        self._safe_record(session, llm_event)
+
+                except Exception as e:
+                    self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
+
+                    kwargs_str = pprint.pformat(kwargs)
+                    chunk = pprint.pformat(chunk)
+                    logger.warning(
+                        f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                        f"chunk:\n {chunk}\n"
+                        f"kwargs:\n {kwargs_str}\n"
+                    )
+
+            if kwargs.get("stream", False):
+                def generator():
+                    for chunk in response:
+                        handle_stream_chunk(chunk)
+                        yield chunk
+                return generator()
+            elif inspect.isasyncgen(response):
+                async def async_generator():
+                    async for chunk in response:
+                        handle_stream_agent(chunk)
+                        yield chunk
+
+                return async_generator()
+            else:
+                llm_event.returns = response
+                llm_event.agent_id = check_call_stack_for_agent_id()
+                llm_event.model = metadata["model_id"]
+                llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]]
+                llm_event.prompt_tokens = None
+                llm_event.completion = response.completion_message.content
+                llm_event.completion_tokens = None
+                llm_event.end_timestamp = get_ISO_time()
+
+                self._safe_record(session, llm_event)
         except Exception as e:
             self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
             kwargs_str = pprint.pformat(kwargs)
@@ -119,14 +168,36 @@ def patched_function(*args, **kwargs):
         # Override the original method with the patched one
         InferenceResource.chat_completion = patched_function
 
+    def _override_create_turn(self):
+        from llama_stack_client.lib.agents.agent import Agent
+
+        global original_create_turn
+        original_create_turn = Agent.create_turn
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = original_create_turn(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")})
+
+        # Override the original method with the patched one
+        Agent.create_turn = patched_function
+
+
     def override(self):
         self._override_complete()
-        # self._override_complete_async()
+        self._override_create_turn()
         # self._override_stream()
         # self._override_stream_async()
 
     def undo_override(self):
         if self.original_complete is not None:
             from llama_stack_client.resources import InferenceResource
-
             InferenceResource.chat_completion = self.original_complete
+
+        if self.original_create_turn is not None:
+            from llama_stack_client.lib.agents.agent import Agent
+            Agent.create_turn = self.original_create_turn
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index 1b4bd3f7f..b4e2d96fa 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,7 +17,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
@@ -94,7 +94,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 7,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -115,7 +115,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -125,9 +125,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n"
+     ]
+    }
+   ],
    "source": [
     "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
@@ -150,29 +158,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": 10,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver glowing crescent', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Lunar gentle glow', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 10.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n"
      ]
     }
    ],
@@ -202,29 +210,29 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.2s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.4s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n"
      ]
     }
    ],
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
new file mode 100644
index 000000000..cd4d169fa
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -0,0 +1,97 @@
+import asyncio
+import os
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types import Attachment
+from llama_stack_client.types.agent_create_params import AgentConfig
+
+import os
+import fire
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.lib.agents.agent import Agent
+from llama_stack_client.lib.agents.event_logger import EventLogger
+from llama_stack_client.types.agent_create_params import AgentConfig
+
+import agentops
+
+import debugpy
+debugpy.listen(5678)
+debugpy.wait_for_client()
+
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+
+LLAMA_STACK_PORT = 5001
+INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
+
+async def agent_test():
+    client = LlamaStackClient(
+        base_url=f"http://localhost:{LLAMA_STACK_PORT}",
+    )
+
+    available_shields = [shield.identifier for shield in client.shields.list()]
+    if not available_shields:
+        print("No available shields. Disable safety.")
+    else:
+        print(f"Available shields found: {available_shields}")
+    available_models = [model.identifier for model in client.models.list()]
+    if not available_models:
+        raise ValueError("No available models")
+    else:
+        selected_model = available_models[0]
+        print(f"Using model: {selected_model}")
+
+    agent_config = AgentConfig(
+        model=selected_model,
+        instructions="You are a helpful assistant. Just say hello as a greeting.",
+        sampling_params={
+            "strategy": "greedy",
+            "temperature": 1.0,
+            "top_p": 0.9,
+        },
+        tools=[
+            {
+                "type": "brave_search",
+                "engine": "brave",
+                "api_key": os.getenv("BRAVE_SEARCH_API_KEY"),
+            }
+        ],
+        tool_choice="auto",
+        tool_prompt_format="json",
+        input_shields=available_shields if available_shields else [],
+        output_shields=available_shields if available_shields else [],
+        enable_session_persistence=False,
+    )
+    agent = Agent(client, agent_config)
+    user_prompts = [
+        "Hello",
+        "Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools",
+    ]
+
+    session_id = agent.create_session("test-session")
+
+    for prompt in user_prompts:
+        response = agent.create_turn(
+            messages=[
+                {
+                    "role": "user",
+                    "content": prompt,
+                }
+            ],
+            session_id=session_id,
+        )
+
+        print("Response: ", response)
+
+        async for log in EventLogger().log(response):
+            log.print()
+
+
+def main():
+  agentops.start_session()
+  asyncio.run(agent_test())
+  agentops.end_session("Success")
+
+main()
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
similarity index 61%
rename from tests/core_manual_tests/providers/llama_stack_client_canary.py
rename to tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
index 75bb4ac00..dc4382f81 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
@@ -9,7 +9,11 @@
 
 load_dotenv()
 
-agentops.init(default_tags=["llama-stack-client-provider-test"])
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+# import debugpy
+# debugpy.listen(5678)
+# debugpy.wait_for_client()
 
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT
@@ -20,18 +24,6 @@
     base_url=f"{full_host}",
 )
 
-response = client.inference.chat_completion(
-    messages=[
-        UserMessage(
-            content="hello world, write me a 3 word poem about the moon",
-            role="user",
-        ),
-    ],
-    model_id="meta-llama/Llama-3.2-3B-Instruct",
-    stream=False,
-)
-
-
 async def stream_test():
     response = client.inference.chat_completion(
         messages=[
@@ -49,9 +41,20 @@ async def stream_test():
 
 
 def main():
-    asyncio.run(stream_test())
+    agentops.start_session()
 
-main()
+    client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-3B-Instruct",
+        stream=False,
+    )
 
+    asyncio.run(stream_test())
+    agentops.end_session(end_state="Success")
 
-agentops.end_session(end_state="Success")
+main()

From 6bf54e5790ad774a26c960b282f7913fbfc2de64 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 26 Nov 2024 01:57:02 -0500
Subject: [PATCH 10/69] save progress to testing Llama Stack Agent class and
 Inference class

---
 agentops/llms/llama_stack_client.py | 79 ++++++++++++++++++-----------
 1 file changed, 49 insertions(+), 30 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index d0554c170..8dcddd60e 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -3,7 +3,7 @@
 import sys
 from typing import Dict, Optional
 
-from ..event import LLMEvent, ErrorEvent
+from ..event import LLMEvent, ErrorEvent, ToolEvent
 from ..session import Session
 from ..log_config import logger
 from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
@@ -21,35 +21,38 @@ def __init__(self, client):
     def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict:
         """Handle responses for LlamaStack"""
         try:
-            llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-            if session is not None:
-                llm_event.session_id = session.session_id
+            accum_delta = None
 
             def handle_stream_chunk(chunk: dict):
+                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                if session is not None:
+                    llm_event.session_id = session.session_id
+
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
                 if llm_event.returns is None:
                     llm_event.returns = chunk.event
 
                 try:
-                    accumulated_delta = llm_event.returns.delta
+                    nonlocal accum_delta
                     llm_event.agent_id = check_call_stack_for_agent_id()
                     llm_event.model = kwargs["model_id"]
                     llm_event.prompt = kwargs["messages"]
 
                     # NOTE: We assume for completion only choices[0] is relevant
-                    choice = chunk.event
-
-                    if choice.delta:
-                        llm_event.returns.delta += choice.delta
+                    # chunk.event
 
-                    if choice.event_type == "complete":
+                    if chunk.event.event_type == "start":
+                        accum_delta = chunk.event.delta
+                    elif chunk.event.event_type == "progress":
+                        accum_delta += chunk.event.delta
+                    elif chunk.event.event_type == "complete":
                         llm_event.prompt = [
                             {"content": message.content, "role": message.role} for message in kwargs["messages"]
                         ]
                         llm_event.agent_id = check_call_stack_for_agent_id()
-                        llm_event.completion = accumulated_delta
                         llm_event.prompt_tokens = None
+                        llm_event.completion = accum_delta
                         llm_event.completion_tokens = None
                         llm_event.end_timestamp = get_ISO_time()
                         self._safe_record(session, llm_event)
@@ -68,7 +71,11 @@ def handle_stream_chunk(chunk: dict):
             def handle_stream_agent(chunk: dict):
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-                
+                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+
+                if session is not None:
+                    llm_event.session_id = session.session_id
+
                 if llm_event.returns is None:
                     llm_event.returns = chunk.event
 
@@ -79,29 +86,39 @@ def handle_stream_agent(chunk: dict):
                         pass
                     elif chunk.event.payload.event_type == "step_progress":
                     
-                        if (chunk.event.payload.step_type == "inference"):
+                        if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response):
+                            nonlocal accum_delta
                             delta = chunk.event.payload.text_delta_model_response
                             llm_event.agent_id = check_call_stack_for_agent_id()
                             llm_event.model = "Llama Stack"
                             llm_event.prompt = kwargs["messages"]
 
-                            if llm_event.completion:
-                                llm_event.completion += delta
+                            if accum_delta:
+                                accum_delta += delta
                             else:
-                                llm_event.completion = delta
-                                
+                                accum_delta = delta
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"):
+                            pass
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
+                            pass
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"):
+                            pass
+
                     elif chunk.event.payload.event_type == "step_complete":
-                        pass
+                        print("Step complete")
+                        if (chunk.event.payload.step_type == "inference"):
+                            llm_event.prompt = [
+                                {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
+                            ]
+                            llm_event.agent_id = check_call_stack_for_agent_id()
+                            llm_event.model = metadata.get("model_id", "Unable to identify model")
+                            llm_event.prompt_tokens = None
+                            llm_event.completion = accum_delta
+                            llm_event.completion_tokens = None
+                            llm_event.end_timestamp = get_ISO_time()
+                            self._safe_record(session, llm_event)
                     elif chunk.event.payload.event_type == "turn_complete":
-                        llm_event.prompt = [
-                            {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
-                        ]
-                        llm_event.agent_id = check_call_stack_for_agent_id()
-                        llm_event.model = metadata.get("model_id", "Unable to identify model")
-                        llm_event.prompt_tokens = None
-                        llm_event.completion_tokens = None
-                        llm_event.end_timestamp = get_ISO_time()
-                        self._safe_record(session, llm_event)
+                        pass
 
                 except Exception as e:
                     self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
@@ -128,9 +145,13 @@ async def async_generator():
 
                 return async_generator()
             else:
+                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                if session is not None:
+                    llm_event.session_id = session.session_id
+
                 llm_event.returns = response
                 llm_event.agent_id = check_call_stack_for_agent_id()
-                llm_event.model = metadata["model_id"]
+                llm_event.model = kwargs["model_id"]
                 llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]]
                 llm_event.prompt_tokens = None
                 llm_event.completion = response.completion_message.content
@@ -190,8 +211,6 @@ def patched_function(*args, **kwargs):
     def override(self):
         self._override_complete()
         self._override_create_turn()
-        # self._override_stream()
-        # self._override_stream_async()
 
     def undo_override(self):
         if self.original_complete is not None:

From 3dc0d2fcb6b22a07a53b578e6bb92f33b42bad34 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 26 Nov 2024 02:01:17 -0500
Subject: [PATCH 11/69] minor edits

---
 agentops/llms/llama_stack_client.py                         | 1 -
 .../providers/llama_stack_client_canary/agent_canary.py     | 6 +++---
 2 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 8dcddd60e..2a1485698 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -105,7 +105,6 @@ def handle_stream_agent(chunk: dict):
                             pass
 
                     elif chunk.event.payload.event_type == "step_complete":
-                        print("Step complete")
                         if (chunk.event.payload.step_type == "inference"):
                             llm_event.prompt = [
                                 {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index cd4d169fa..9921c4d69 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -16,9 +16,9 @@
 
 import agentops
 
-import debugpy
-debugpy.listen(5678)
-debugpy.wait_for_client()
+# import debugpy
+# debugpy.listen(5678)
+# debugpy.wait_for_client()
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 

From b815ef3740debb9d661a0d1f2e2cc7b8069801c4 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 26 Nov 2024 11:58:39 -0500
Subject: [PATCH 12/69] removing unneeded code

---
 agentops/llms/llama_stack_client.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 2a1485698..a5d06f90d 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -90,7 +90,6 @@ def handle_stream_agent(chunk: dict):
                             nonlocal accum_delta
                             delta = chunk.event.payload.text_delta_model_response
                             llm_event.agent_id = check_call_stack_for_agent_id()
-                            llm_event.model = "Llama Stack"
                             llm_event.prompt = kwargs["messages"]
 
                             if accum_delta:

From 888b6351b2ff5f5356846818aaed55d0e3328ccd Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 26 Nov 2024 12:02:08 -0500
Subject: [PATCH 13/69] format line

---
 agentops/llms/llama_stack_client.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index a5d06f90d..5c060da60 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -140,7 +140,6 @@ async def async_generator():
                     async for chunk in response:
                         handle_stream_agent(chunk)
                         yield chunk
-
                 return async_generator()
             else:
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)

From 1c7c1deecf673db4d16a9487a8048e06b65171fd Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 26 Nov 2024 14:00:03 -0500
Subject: [PATCH 14/69] adding support for monitoring tools

---
 agentops/llms/llama_stack_client.py | 28 ++++++++++++++++++++++------
 1 file changed, 22 insertions(+), 6 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 5c060da60..8fbd2b29d 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -22,6 +22,7 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se
         """Handle responses for LlamaStack"""
         try:
             accum_delta = None
+            accum_tool_delta = None
 
             def handle_stream_chunk(chunk: dict):
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
@@ -72,6 +73,7 @@ def handle_stream_agent(chunk: dict):
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                tool_event = ToolEvent(init_timestamp=init_timestamp, params=kwargs)
 
                 if session is not None:
                     llm_event.session_id = session.session_id
@@ -80,9 +82,9 @@ def handle_stream_agent(chunk: dict):
                     llm_event.returns = chunk.event
 
                 try:
-                    if chunk.event.payload.event_type == "step_start":
+                    if chunk.event.payload.event_type == "turn_start":
                         pass
-                    elif chunk.event.payload.event_type == "turn_start":
+                    elif chunk.event.payload.event_type == "step_start":
                         pass
                     elif chunk.event.payload.event_type == "step_progress":
                     
@@ -97,11 +99,21 @@ def handle_stream_agent(chunk: dict):
                             else:
                                 accum_delta = delta
                         elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"):
-                            pass
+                            tool_event.name = "ToolExecution - started"
+                            self._safe_record(session, tool_event)
                         elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
-                            pass
+                            nonlocal accum_tool_delta
+                            delta = chunk.event.payload.tool_call_delta.content
+                            if accum_tool_delta:
+                                accum_tool_delta += delta
+                            else:
+                                accum_tool_delta = delta
                         elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"):
-                            pass
+                            tool_event.name = "ToolExecution - success"
+                            tool_event.params["completion"] = accum_tool_delta
+                            self._safe_record(session, tool_event)
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "failure"):
+                            self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
                     elif chunk.event.payload.event_type == "step_complete":
                         if (chunk.event.payload.step_type == "inference"):
@@ -111,10 +123,14 @@ def handle_stream_agent(chunk: dict):
                             llm_event.agent_id = check_call_stack_for_agent_id()
                             llm_event.model = metadata.get("model_id", "Unable to identify model")
                             llm_event.prompt_tokens = None
-                            llm_event.completion = accum_delta
+                            llm_event.completion = accum_delta or kwargs["completion"]
                             llm_event.completion_tokens = None
                             llm_event.end_timestamp = get_ISO_time()
                             self._safe_record(session, llm_event)
+                        elif (chunk.event.payload.step_type == "tool_execution"):
+                            tool_event.name = "ToolExecution - complete"
+                            tool_event.params["completion"] = accum_tool_delta
+                            self._safe_record(session, tool_event)
                     elif chunk.event.payload.event_type == "turn_complete":
                         pass
 

From 187963b05c6fe86a97b42bf0e5b565c2bb474dbc Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Wed, 27 Nov 2024 12:40:39 -0500
Subject: [PATCH 15/69] for completeness

---
 agentops/llms/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py
index 3c29167c6..b26cd1233 100644
--- a/agentops/llms/__init__.py
+++ b/agentops/llms/__init__.py
@@ -38,7 +38,7 @@ class LlmTracker:
         },
         "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")},
         "llama_stack_client": {
-            "0.0.53": ("resources.InferenceResource.chat_completion"),
+            "0.0.53": ("resources.InferenceResource.chat_completion", "lib.agents.agent.Agent.create_turn"),
         },
         "groq": {
             "0.9.0": ("Client.chat", "AsyncClient.chat"),

From c1a58f289cb2d8f47a540d01805280b8bc157e87 Mon Sep 17 00:00:00 2001
From: reibs <areibman@gmail.com>
Date: Fri, 29 Nov 2024 12:44:52 -0500
Subject: [PATCH 16/69] remove logs

---
 .../llama_stack_client_examples.ipynb         | 63 +------------------
 1 file changed, 2 insertions(+), 61 deletions(-)

diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index b4e2d96fa..098fa2f68 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,68 +17,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
-      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n",
-      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
-      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
-      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
-      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
-      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n",
-      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U agentops\n",

From ac3e01e1262921e9aa30037bae8380a2cad2e571 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Fri, 29 Nov 2024 23:35:50 -0500
Subject: [PATCH 17/69] implemeting code review

---
 agentops/llms/llama_stack_client.py           |  8 +-
 .../llama_stack_client_examples/README.md     | 73 ++++++++++++++++---
 .../docker-compose.yaml                       | 57 +++++++++++++++
 .../llama-stack-server-config.yaml            |  2 +-
 .../llama_stack_client_examples.ipynb         | 68 +++++++++++------
 .../llama_stack_client_canary/agent_canary.py |  2 +-
 6 files changed, 169 insertions(+), 41 deletions(-)
 create mode 100644 examples/llama_stack_client_examples/docker-compose.yaml

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 8fbd2b29d..7ed9f14f6 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -3,11 +3,11 @@
 import sys
 from typing import Dict, Optional
 
-from ..event import LLMEvent, ErrorEvent, ToolEvent
-from ..session import Session
-from ..log_config import logger
+from agentops.event import LLMEvent, ErrorEvent, ToolEvent
+from agentops.session import Session
+from agentops.log_config import logger
 from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
-from .instrumented_provider import InstrumentedProvider
+from agentops.llms.instrumented_provider import InstrumentedProvider
 
 class LlamaStackClientProvider(InstrumentedProvider):
     original_complete = None
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 176b8c8a3..f88666a1a 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -5,35 +5,37 @@ How to set up a Llama Stack server for supporting the `llama_stack_client_exampl
 ## Disclaimer
 
 As of 11/2024, Llama Stack is new and is subject to breaking changes.
-
 Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/
 
-## High-level steps
+## ToC
 
-https://llama-stack.readthedocs.io/en/latest/getting_started/index.html#
+1. Running the Ollama Server and Llama Stack Server on the Host 
+  - a) Download, install, & start Ollama
+  - b) Start the Llama Stack Server
+  - c) Call the Llama Stack Server with a Llama Stack Client
+2. Running the Ollama Server in a Docker Container
 
-1. Download, install, & start Ollama
-2. Start the Llama Stack Server
-3. Call the Llama Stack Server with a Llama Stack Client
+## Running the Ollama Server and Llama Stack Server on the Host 
 
-### 1 - Download, install, & start Ollama
+### 1a - Download, install, & start Ollama
 
 https://ollama.com/
 
 Ollama has an easy-to-use installer available for macOS, Linux, and Windows.
 
 ```sh
-export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16"
+export OLLAMA_INFERENCE_MODEL="llama3.2:1b-instruct-fp16"
 ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
+ollama run llama3.2:1b --keepalive 60m
 ```
 
-### 2 - Start the Llama Stack server
+### 1b - Start the Llama Stack server
 
 You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml`
 
 ```sh
 export LLAMA_STACK_PORT=5001
-export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
+export INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
 docker run \
   -it \
   -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
@@ -46,10 +48,58 @@ docker run \
   --env OLLAMA_URL=http://host.docker.internal:11434
 ```
 
-### 3 - Call the Llama Stack Server with a Llama Stack Client
+```sh
+docker run \
+  -it \
+  -p 5001:5001 \
+  -v ~/.llama:/root/.llama \
+  -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \
+  llamastack/distribution-ollama \
+  --yaml-config /root/my-run.yaml \
+  --port 5001 \
+  --env INFERENCE_MODEL=meta-llama/Llama-3.2-1B \
+  --env OLLAMA_URL=http://host.docker.internal:11434
+```
+
+
+### 1c - Call the Llama Stack Server with a Llama Stack Client
 
 ie: Check out the examples in the `llama_stack_client_examples.ipynb` file
 
+## Running the Ollama Server in a Docker Container
+
+```sh - set up the ollama server
+docker-compose -f docker-compose.yaml up
+```
+
+```sh - download a model
+curl -X POST http://localhost:11434/api/pull -d '{"model": "llama3.2:1b"}'
+```
+
+```sh - test the model
+curl http://localhost:11434/api/generate -d '{
+  "model": "llama3.2:1b",
+  "prompt": "Why is the sky blue?"
+}'
+
+curl http://localhost:11434/api/chat -d '{
+  "model": "llama3.2:1b",
+  "messages": [
+    {
+      "role": "user",
+      "content": "why is the sky blue?"
+    }
+  ],
+  "stream": false
+}'
+```
+
+## 2 - Running the Ollama Server in a Docker Container
+
+```sh
+docker-compose -f docker-compose.yaml up
+```
+
 ## Common Gotchas
 
 1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
@@ -68,3 +118,4 @@ ie: Check out the examples in the `llama_stack_client_examples.ipynb` file
 - https://github.com/meta-llama/llama-stack
 - download https://ollama.com/
 - https://www.llama.com/docs/getting_the_models/meta/
+- https://llama-stack.readthedocs.io/en/latest/getting_started/index.html
diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml
new file mode 100644
index 000000000..a4ed6e416
--- /dev/null
+++ b/examples/llama_stack_client_examples/docker-compose.yaml
@@ -0,0 +1,57 @@
+version: '3.8'
+
+services:
+  # Ollama server service
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama_server
+    ports:
+      - "11434:11434" # Map Ollama's port to host
+    volumes:
+      - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models)
+    entrypoint: ["ollama", "serve"] # Start the Ollama server
+    restart: always # Ensure Ollama server restarts on failure
+
+  # Ephemeral service to trigger model download
+  model_downloader:
+    image: curlimages/curl:latest # Use a lightweight image with curl
+    depends_on:
+      - ollama # Ensure the Ollama server starts first
+    entrypoint: >
+      sh -c "sleep 5 &&
+      curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:1b-instruct-fp16\"}'"
+    restart: "no" # Ensure this service doesn't restart
+
+
+  tester:
+    image: curlimages/curl:latest # Use a lightweight image with curl
+    depends_on:
+      - model_downloader # Ensure the Ollama server starts first
+    entrypoint: >
+      sh -c "sleep 5 &&
+      curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:1b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
+    restart: "no" # Ensure this service doesn't restart
+
+  llama-stack:
+    image: llamastack/distribution-ollama
+    container_name: llama_stack_server
+    ports:
+      - "5001:5001"
+    volumes:
+      - "~/.ollama/models:/root/.ollama"
+      - "./llama-stack-server-config.yaml:/root/my-run.yaml"
+    environment:
+      - INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+      - OLLAMA_URL=http://ollama:11434
+    command: >
+      --yaml-config /root/my-run.yaml
+      --port 5001
+    platform: linux/amd64
+    depends_on:
+      - ollama
+      - model_downloader
+      - tester
+
+networks:
+  default:
+    driver: bridge
diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml
index 32137fd67..c51a454eb 100644
--- a/examples/llama_stack_client_examples/llama-stack-server-config.yaml
+++ b/examples/llama_stack_client_examples/llama-stack-server-config.yaml
@@ -13,7 +13,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      url: ${env.OLLAMA_URL}
   memory:
   - provider_id: faiss
     provider_type: inline::faiss
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index 098fa2f68..bfb858863 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -35,7 +35,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 7,
+   "execution_count": 8,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -56,7 +56,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,7 +66,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": 16,
    "metadata": {},
    "outputs": [
     {
@@ -99,29 +99,35 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 10,
+   "execution_count": 18,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Lunar gentle glow', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f3dde247-e5bd-4d25-ab2a-08612270cb08\u001b[0m\u001b[0m\n"
      ]
     },
     {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 10.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n"
+     "ename": "InternalServerError",
+     "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[18], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:199\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    198\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 199\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1248\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1249\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1256\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1258\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1259\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1260\u001b[0m     )\n\u001b[0;32m-> 1261\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:953\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    950\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    951\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 953\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    954\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    955\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1040\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1042\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1043\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1091\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1092\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1040\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1042\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1043\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1091\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1092\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1056\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1053\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1055\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1056\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1058\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1059\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1060\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1064\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1065\u001b[0m )\n",
+      "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
      ]
     }
    ],
@@ -134,7 +140,7 @@
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
     "    stream=False\n",
     ")\n",
     "\n",
@@ -151,29 +157,43 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": 19,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5d7c69df-a2e7-4405-810b-8c9283c30a10\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
+      "\u001b[97m\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.4s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n"
      ]
     }
    ],
@@ -186,7 +206,7 @@
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
     "    stream=True\n",
     ")\n",
     "\n",
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 9921c4d69..5f54abeb0 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -24,7 +24,7 @@
 
 
 LLAMA_STACK_PORT = 5001
-INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct"
+INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
 
 async def agent_test():
     client = LlamaStackClient(

From 8122c3f19b8592fe77bdb1ea0b483b87057ba658 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 03:48:11 -0500
Subject: [PATCH 18/69] saving progress of getting agent monkeypatch tested in
 the ipynb

---
 agentops/llms/llama_stack_client.py           |  34 +--
 .../llama_stack_client_examples.ipynb         | 256 +++++++++++++++---
 .../inference_canary.py                       |  22 +-
 3 files changed, 253 insertions(+), 59 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 7ed9f14f6..b9ed79ad3 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -98,22 +98,24 @@ def handle_stream_agent(chunk: dict):
                                 accum_delta += delta
                             else:
                                 accum_delta = delta
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"):
-                            tool_event.name = "ToolExecution - started"
-                            self._safe_record(session, tool_event)
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
-                            nonlocal accum_tool_delta
-                            delta = chunk.event.payload.tool_call_delta.content
-                            if accum_tool_delta:
-                                accum_tool_delta += delta
-                            else:
-                                accum_tool_delta = delta
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"):
-                            tool_event.name = "ToolExecution - success"
-                            tool_event.params["completion"] = accum_tool_delta
-                            self._safe_record(session, tool_event)
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "failure"):
-                            self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):
+                            
+                            if (chunk.event.payload.tool_call_delta.parse_status == "started"):
+                                tool_event.name = "ToolExecution - started"
+                                self._safe_record(session, tool_event)
+                            elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
+                                nonlocal accum_tool_delta
+                                delta = chunk.event.payload.tool_call_delta.content
+                                if accum_tool_delta:
+                                    accum_tool_delta += delta
+                                else:
+                                    accum_tool_delta = delta
+                            elif (chunk.event.payload.tool_call_delta.parse_status == "success"):
+                                tool_event.name = "ToolExecution - success"
+                                tool_event.params["completion"] = accum_tool_delta
+                                self._safe_record(session, tool_event)    
+                            elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
+                                self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
                     elif chunk.event.payload.event_type == "step_complete":
                         if (chunk.event.payload.step_type == "inference"):
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index bfb858863..d79971260 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,9 +17,68 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 2,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
+      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
+      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
+      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n",
+      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U agentops\n",
@@ -35,16 +94,19 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": 27,
    "metadata": {},
    "outputs": [],
    "source": [
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
     "from llama_stack_client.types import UserMessage\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
     "from dotenv import load_dotenv\n",
     "import os\n",
-    "import agentops"
+    "import agentops\n",
+    "import asyncio"
    ]
   },
   {
@@ -54,9 +116,14 @@
     "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": []
+  },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": 28,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -66,17 +133,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 16,
+   "execution_count": 29,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
@@ -99,7 +158,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 18,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -132,20 +191,20 @@
     }
    ],
    "source": [
-    "agentops.start_session()\n",
-    "response = client.inference.chat_completion(\n",
-    "    messages=[\n",
-    "        UserMessage(\n",
-    "            content=\"write me a 3 word poem about the moon\",\n",
-    "            role=\"user\",\n",
-    "        ),\n",
-    "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
-    "    stream=False\n",
-    ")\n",
+    "# agentops.start_session()\n",
+    "# response = client.inference.chat_completion(\n",
+    "#     messages=[\n",
+    "#         UserMessage(\n",
+    "#             content=\"write me a 3 word poem about the moon\",\n",
+    "#             role=\"user\",\n",
+    "#         ),\n",
+    "#     ],\n",
+    "#     model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "#     stream=False\n",
+    "# )\n",
     "\n",
-    "print(f\"> Response: {response}\")\n",
-    "agentops.end_session(\"Success\")"
+    "# print(f\"> Response: {response}\")\n",
+    "# agentops.end_session(\"Success\")"
    ]
   },
   {
@@ -157,21 +216,23 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 19,
+   "execution_count": 84,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5d7c69df-a2e7-4405-810b-8c9283c30a10\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=117bbe62-4f2d-4d33-bec9-ac9374ac6092\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
+      "\n",
+      "\u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m"
      ]
     },
     {
@@ -216,6 +277,137 @@
     "agentops.end_session(\"Success\")"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=984ed603-12a9-4c76-95b1-36c327a0b6d4\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No available shields. Disable safety.\n",
+      "Using model: meta-llama/Llama-3.2-1B-Instruct\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
+      "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n",
+      "response:\n",
+      " <generator object Agent.create_turn at 0x110618480>\n",
+      "kwargs:\n",
+      " {'messages': [{'content': 'Hello', 'role': 'user'}],\n",
+      " 'session_id': '08402a4a-7991-4831-b53c-893a809898af'}\n",
+      "\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Response:  <generator object Agent.create_turn at 0x110618480>\n"
+     ]
+    }
+   ],
+   "source": [
+    "import nest_asyncio\n",
+    "import asyncio\n",
+    "import os\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "\n",
+    "# Apply nest_asyncio to handle nested event loops\n",
+    "# nest_asyncio.apply()\n",
+    "\n",
+    "LLAMA_STACK_PORT = 5001\n",
+    "\n",
+    "# Replace with actual API keys for functionality\n",
+    "os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"your-brave-search-api-key\"\n",
+    "\n",
+    "async def agent_test():\n",
+    "    client = LlamaStackClient(\n",
+    "        base_url=f\"http://localhost:{LLAMA_STACK_PORT}\",\n",
+    "    )\n",
+    "\n",
+    "    available_shields = [shield.identifier for shield in client.shields.list()]\n",
+    "    if not available_shields:\n",
+    "        print(\"No available shields. Disable safety.\")\n",
+    "    else:\n",
+    "        print(f\"Available shields found: {available_shields}\")\n",
+    "    available_models = [model.identifier for model in client.models.list()]\n",
+    "    if not available_models:\n",
+    "        raise ValueError(\"No available models\")\n",
+    "    else:\n",
+    "        selected_model = available_models[0]\n",
+    "        print(f\"Using model: {selected_model}\")\n",
+    "\n",
+    "    agent_config = AgentConfig(\n",
+    "        model=selected_model,\n",
+    "        instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n",
+    "        sampling_params={\n",
+    "            \"strategy\": \"greedy\",\n",
+    "            \"temperature\": 1.0,\n",
+    "            \"top_p\": 0.9,\n",
+    "        },\n",
+    "        tools=[\n",
+    "            {\n",
+    "                \"type\": \"brave_search\",\n",
+    "                \"engine\": \"brave\",\n",
+    "                \"api_key\": os.getenv(\"BRAVE_SEARCH_API_KEY\"),\n",
+    "            }\n",
+    "        ],\n",
+    "        tool_choice=\"auto\",\n",
+    "        tool_prompt_format=\"json\",\n",
+    "        input_shields=available_shields if available_shields else [],\n",
+    "        output_shields=available_shields if available_shields else [],\n",
+    "        enable_session_persistence=False,\n",
+    "    )\n",
+    "    agent = Agent(client, agent_config)\n",
+    "    user_prompts = [\n",
+    "        \"Hello\",\n",
+    "        \"Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools\",\n",
+    "    ]\n",
+    "\n",
+    "    session_id = agent.create_session(\"test-session\")\n",
+    "\n",
+    "    for prompt in user_prompts:\n",
+    "        response = agent.create_turn(\n",
+    "            messages=[\n",
+    "                {\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"content\": prompt,\n",
+    "                }\n",
+    "            ],\n",
+    "            session_id=session_id,\n",
+    "        )\n",
+    "\n",
+    "        print(\"Response: \", response)\n",
+    "\n",
+    "        async for log in EventLogger().log(response):\n",
+    "            log.print()\n",
+    "\n",
+    "\n",
+    "def main():\n",
+    "  agentops.start_session()\n",
+    "  asyncio.run(agent_test())\n",
+    "  agentops.end_session(\"Success\")\n",
+    "\n",
+    "main()"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": null,
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
index dc4382f81..38dec66cc 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
@@ -32,7 +32,7 @@ async def stream_test():
                 role="user",
             ),
         ],
-        model_id="meta-llama/Llama-3.2-3B-Instruct",
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
         stream=True,
     )
 
@@ -43,16 +43,16 @@ async def stream_test():
 def main():
     agentops.start_session()
 
-    client.inference.chat_completion(
-        messages=[
-            UserMessage(
-                content="hello world, write me a 3 word poem about the moon",
-                role="user",
-            ),
-        ],
-        model_id="meta-llama/Llama-3.2-3B-Instruct",
-        stream=False,
-    )
+    # client.inference.chat_completion(
+    #     messages=[
+    #         UserMessage(
+    #             content="hello world, write me a 3 word poem about the moon",
+    #             role="user",
+    #         ),
+    #     ],
+    #     model_id="meta-llama/Llama-3.2-1B-Instruct",
+    #     stream=False,
+    # )
 
     asyncio.run(stream_test())
     agentops.end_session(end_state="Success")

From b131246bd55feb247d6d495a4e5803f22fb792f8 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 07:57:26 -0500
Subject: [PATCH 19/69] saving testing scaffold and preliminary fireworks
 setup/support

---
 .../README.fireworks.md                       |  9 +++
 .../fireworks-compose.yaml                    | 16 +++++
 .../fireworks-server-config.yaml              | 59 +++++++++++++++++++
 .../llama_stack_client_examples.ipynb         | 51 +++++++---------
 tests/llama_stack/test_llama_stack.py         | 57 ++++++++++++++++++
 5 files changed, 161 insertions(+), 31 deletions(-)
 create mode 100644 examples/llama_stack_client_examples/README.fireworks.md
 create mode 100644 examples/llama_stack_client_examples/fireworks-compose.yaml
 create mode 100644 examples/llama_stack_client_examples/fireworks-server-config.yaml
 create mode 100644 tests/llama_stack/test_llama_stack.py

diff --git a/examples/llama_stack_client_examples/README.fireworks.md b/examples/llama_stack_client_examples/README.fireworks.md
new file mode 100644
index 000000000..412821953
--- /dev/null
+++ b/examples/llama_stack_client_examples/README.fireworks.md
@@ -0,0 +1,9 @@
+##
+
+https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml
+
+##
+
+```sh
+docker-compose -f fireworks-server-config.yaml up
+```
diff --git a/examples/llama_stack_client_examples/fireworks-compose.yaml b/examples/llama_stack_client_examples/fireworks-compose.yaml
new file mode 100644
index 000000000..fcac78a29
--- /dev/null
+++ b/examples/llama_stack_client_examples/fireworks-compose.yaml
@@ -0,0 +1,16 @@
+services:
+  llamastack:
+    image: llamastack/distribution-fireworks
+    network_mode: "host"
+    volumes:
+      - ~/.llama:/root/.llama
+      - ./run.yaml:/root/llamastack-run-fireworks.yaml
+    ports:
+      - "5000:5000"
+    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-fireworks.yaml"
+    deploy:
+      restart_policy:
+        condition: on-failure
+        delay: 3s
+        max_attempts: 5
+        window: 60s
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks-server-config.yaml
new file mode 100644
index 000000000..2f8f8429e
--- /dev/null
+++ b/examples/llama_stack_client_examples/fireworks-server-config.yaml
@@ -0,0 +1,59 @@
+version: '2'
+image_name: fireworks
+docker_image: null
+conda_env: fireworks
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: fireworks
+    provider_type: remote::fireworks
+    config:
+      url: "https://api.fireworks.ai/inference"
+      api_key: "fw_3ZVeWz59L6eAVPG1GRnCm7wW"
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
+models:
+- metadata: {}
+  model_id: meta-llama/Llama-3.2-1B-Instruct
+  provider_id: null
+  provider_model_id: fireworks/llama-v3p2-1b-instruct
+shields:
+- params: null
+  shield_id: meta-llama/Llama-Guard-3-8B
+  provider_id: null
+  provider_shield_id: null
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index d79971260..ccf768932 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -279,24 +279,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 54,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=984ed603-12a9-4c76-95b1-36c327a0b6d4\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No available shields. Disable safety.\n",
-      "Using model: meta-llama/Llama-3.2-1B-Instruct\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
@@ -305,10 +290,22 @@
       "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
       "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n",
       "response:\n",
-      " <generator object Agent.create_turn at 0x110618480>\n",
+      " <generator object Agent.create_turn at 0x1105648c0>\n",
       "kwargs:\n",
       " {'messages': [{'content': 'Hello', 'role': 'user'}],\n",
-      " 'session_id': '08402a4a-7991-4831-b53c-893a809898af'}\n",
+      " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n",
+      "\n",
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
+      "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n",
+      "response:\n",
+      " <generator object Agent.create_turn at 0x110566ce0>\n",
+      "kwargs:\n",
+      " {'messages': [{'content': 'Which players played in the winning team of the NBA '\n",
+      "                          'western conference semifinals of 2024, please use '\n",
+      "                          'tools',\n",
+      "               'role': 'user'}],\n",
+      " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n",
       "\n"
      ]
     },
@@ -316,13 +313,14 @@
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Response:  <generator object Agent.create_turn at 0x110618480>\n"
+      "No available shields. Disable safety.\n",
+      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
+      "Response:  <generator object Agent.create_turn at 0x1105648c0>\n",
+      "Response:  <generator object Agent.create_turn at 0x110566ce0>\n"
      ]
     }
    ],
    "source": [
-    "import nest_asyncio\n",
-    "import asyncio\n",
     "import os\n",
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client.lib.agents.agent import Agent\n",
@@ -396,16 +394,7 @@
     "\n",
     "        print(\"Response: \", response)\n",
     "\n",
-    "        async for log in EventLogger().log(response):\n",
-    "            log.print()\n",
-    "\n",
-    "\n",
-    "def main():\n",
-    "  agentops.start_session()\n",
-    "  asyncio.run(agent_test())\n",
-    "  agentops.end_session(\"Success\")\n",
-    "\n",
-    "main()"
+    "await agent_test()"
    ]
   },
   {
diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py
new file mode 100644
index 000000000..b93a18567
--- /dev/null
+++ b/tests/llama_stack/test_llama_stack.py
@@ -0,0 +1,57 @@
+import pytest
+import requests_mock
+import time
+
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from llama_stack_client.lib.inference.event_logger import EventLogger
+
+
+@pytest.fixture(autouse=True)
+def setup_teardown():
+    yield
+
+
+@pytest.fixture(autouse=True, scope="function")
+def mock_req():
+    with requests_mock.Mocker() as m:
+        url = "http://localhost:5001"
+        m.post(url + "/v2/create_events", json={"status": "ok"})
+        m.post(url + "/v2/create_session", json={"status": "success", "jwt": "some_jwt"})
+        
+        yield m
+
+
+class TestLlamaStack:
+    def setup_method(self):
+        
+        print("...Setting up LlamaStackClient...")
+        
+        host = "0.0.0.0" # LLAMA_STACK_HOST
+        port = 5001 # LLAMA_STACK_PORT
+
+        full_host = f"http://{host}:{port}"
+
+        self.client = LlamaStackClient(
+            base_url=f"{full_host}",
+        )
+
+
+    def test_llama_stack_inference(self, mock_req):
+        
+        response = self.client.inference.chat_completion(
+            messages=[
+                UserMessage(
+                    content="hello world, write me a 3 word poem about the moon",
+                    role="user",
+                ),
+            ],
+            model_id="meta-llama/Llama-3.2-1B-Instruct",
+            stream=False,
+        )
+
+        # async for log in EventLogger().log(response):
+        #   log.print()
+
+        print(response)
+        

From ae572ba6d15481ddab6ab6653457ef5a368cde8d Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 08:14:39 -0500
Subject: [PATCH 20/69] remove Fireworks API key

---
 .../llama_stack_client_examples/fireworks-server-config.yaml    | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks-server-config.yaml
index 2f8f8429e..cb9dd2cbc 100644
--- a/examples/llama_stack_client_examples/fireworks-server-config.yaml
+++ b/examples/llama_stack_client_examples/fireworks-server-config.yaml
@@ -14,7 +14,7 @@ providers:
     provider_type: remote::fireworks
     config:
       url: "https://api.fireworks.ai/inference"
-      api_key: "fw_3ZVeWz59L6eAVPG1GRnCm7wW"
+      api_key: "<FIREWORKS_API_KEY>"
   memory:
   - provider_id: faiss
     provider_type: inline::faiss

From 0a12c5c3cdc2f16830a29ad7995bf7d3426f350b Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 08:18:52 -0500
Subject: [PATCH 21/69] removing uneeded global

---
 agentops/llms/llama_stack_client.py | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index b9ed79ad3..25bb65ff9 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -207,8 +207,7 @@ def patched_function(*args, **kwargs):
     def _override_create_turn(self):
         from llama_stack_client.lib.agents.agent import Agent
 
-        global original_create_turn
-        original_create_turn = Agent.create_turn
+        self.original_create_turn = Agent.create_turn
 
         def patched_function(*args, **kwargs):
             # Call the original function with its original arguments
@@ -216,7 +215,7 @@ def patched_function(*args, **kwargs):
             session = kwargs.get("session", None)
             if "session" in kwargs.keys():
                 del kwargs["session"]
-            result = original_create_turn(*args, **kwargs)
+            result = self.original_create_turn(*args, **kwargs)
             return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")})
 
         # Override the original method with the patched one

From 9a43d74cd95c8bcabd21984bfd33d8f883075dbf Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Sat, 30 Nov 2024 12:28:40 -0600
Subject: [PATCH 22/69] enhance(compose): remove deprecate version attr

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/docker-compose.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml
index a4ed6e416..ae5362ab3 100644
--- a/examples/llama_stack_client_examples/docker-compose.yaml
+++ b/examples/llama_stack_client_examples/docker-compose.yaml
@@ -1,5 +1,3 @@
-version: '3.8'
-
 services:
   # Ollama server service
   ollama:

From 13950fcd6e1b4efc5c4b64c82aad8e414df8bb81 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Sat, 30 Nov 2024 12:42:36 -0600
Subject: [PATCH 23/69] Removing some redundancies

Signed-off-by: Teo <teocns@gmail.com>
---
 .../llama_stack_client_canary/agent_canary.py | 24 ++++++-------------
 1 file changed, 7 insertions(+), 17 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 5f54abeb0..b466f45d0 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -7,25 +7,18 @@
 from llama_stack_client.types import Attachment
 from llama_stack_client.types.agent_create_params import AgentConfig
 
-import os
-import fire
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.lib.agents.agent import Agent
-from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types.agent_create_params import AgentConfig
-
 import agentops
 
+LLAMA_STACK_PORT = 5001
+INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
+
 # import debugpy
 # debugpy.listen(5678)
 # debugpy.wait_for_client()
 
-agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False)
 
 
-LLAMA_STACK_PORT = 5001
-INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
-
 async def agent_test():
     client = LlamaStackClient(
         base_url=f"http://localhost:{LLAMA_STACK_PORT}",
@@ -89,9 +82,6 @@ async def agent_test():
             log.print()
 
 
-def main():
-  agentops.start_session()
-  asyncio.run(agent_test())
-  agentops.end_session("Success")
-
-main()
+agentops.start_session()
+asyncio.run(agent_test())
+agentops.end_session("Success")

From fe06a44e7381817acbecd0accef50306562c3405 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 14:18:33 -0500
Subject: [PATCH 24/69] saving tweak to custom docker-compose.yaml for llama
 stack

---
 .../llama_stack_client_examples/README.md     |  2 +-
 .../docker-compose.yaml                       |  6 +-
 .../llama_stack_client_examples.ipynb         | 63 +++++++++++++---
 .../llama_stack_ollama/README.md              |  4 +
 .../llama_stack_ollama/compose.yaml           | 73 +++++++++++++++++++
 .../llama_stack_ollama/pull-models.sh         | 18 +++++
 .../llama_stack_ollama/run-with-safety.yaml   | 62 ++++++++++++++++
 .../llama_stack_ollama/run.yaml               | 54 ++++++++++++++
 8 files changed, 267 insertions(+), 15 deletions(-)
 create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/README.md
 create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml
 create mode 100755 examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh
 create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml
 create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/run.yaml

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index f88666a1a..d560733f4 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -102,7 +102,7 @@ docker-compose -f docker-compose.yaml up
 
 ## Common Gotchas
 
-1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
+1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
 
 ## Useful ollama commands
 
diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml
index ae5362ab3..4ba388ab4 100644
--- a/examples/llama_stack_client_examples/docker-compose.yaml
+++ b/examples/llama_stack_client_examples/docker-compose.yaml
@@ -17,7 +17,7 @@ services:
       - ollama # Ensure the Ollama server starts first
     entrypoint: >
       sh -c "sleep 5 &&
-      curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:1b-instruct-fp16\"}'"
+      curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'"
     restart: "no" # Ensure this service doesn't restart
 
 
@@ -27,7 +27,7 @@ services:
       - model_downloader # Ensure the Ollama server starts first
     entrypoint: >
       sh -c "sleep 5 &&
-      curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:1b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
+      curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
     restart: "no" # Ensure this service doesn't restart
 
   llama-stack:
@@ -39,7 +39,7 @@ services:
       - "~/.ollama/models:/root/.ollama"
       - "./llama-stack-server-config.yaml:/root/my-run.yaml"
     environment:
-      - INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
+      - INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
       - OLLAMA_URL=http://ollama:11434
     command: >
       --yaml-config /root/my-run.yaml
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index ccf768932..917a5a852 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,9 +17,16 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": 59,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "41840.28s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+     ]
+    },
     {
      "name": "stdout",
      "output_type": "stream",
@@ -56,7 +63,20 @@
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "41847.06s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
       "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
       "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
@@ -70,7 +90,20 @@
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "41853.46s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
       "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
@@ -94,7 +127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 27,
+   "execution_count": 60,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -123,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 28,
+   "execution_count": 61,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -133,9 +166,17 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 29,
+   "execution_count": 62,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n"
+     ]
+    }
+   ],
    "source": [
     "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
@@ -216,14 +257,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 84,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=117bbe62-4f2d-4d33-bec9-ac9374ac6092\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=16acb375-5b2d-4c7d-a086-276a333ffad4\u001b[0m\u001b[0m\n"
      ]
     },
     {
@@ -232,7 +273,7 @@
      "text": [
       "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
       "\n",
-      "\u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m"
+      "\u001b[0m\u001b[33m\"M\u001b[0m\u001b[33moon\u001b[0m\u001b[33m's\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m silver\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m\"\u001b[0m"
      ]
     },
     {
@@ -267,7 +308,7 @@
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
     "    stream=True\n",
     ")\n",
     "\n",
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/README.md b/examples/llama_stack_client_examples/llama_stack_ollama/README.md
new file mode 100644
index 000000000..751820d8a
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_ollama/README.md
@@ -0,0 +1,4 @@
+
+chmod +x pull_models.sh
+
+docker-compose -f compose.yaml up
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml
new file mode 100644
index 000000000..9eff1970d
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml
@@ -0,0 +1,73 @@
+services:
+  ollama:
+    image: ollama/ollama:latest
+    container_name: ollama
+    network_mode: ${NETWORK_MODE:-bridge}
+    volumes:
+      - ~/.ollama:/root/.ollama
+    ports:
+      - "11434:11434"
+    environment:
+      OLLAMA_DEBUG: 1
+    command: []
+    deploy:
+      resources:
+        limits:
+          memory: 8G    # Set maximum memory
+        reservations:
+          memory: 8G    # Set minimum memory reservation
+    # healthcheck:
+    #   # ugh, no CURL in ollama image
+    #   test: ["CMD", "curl", "-f", "http://ollama:11434"]
+    #   interval: 10s
+    #   timeout: 5s
+    #   retries: 5
+
+  ollama-init:
+    image: ollama/ollama:latest
+    depends_on:
+      - ollama
+        # condition: service_healthy
+    network_mode: ${NETWORK_MODE:-bridge}
+    container_name: ollama-init
+    environment:
+      - OLLAMA_HOST=ollama
+      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+    volumes:
+      - ~/.ollama:/root/.ollama
+      - ./pull-models.sh:/root/pull-models.sh
+    entrypoint: ["/root/pull-models.sh"]
+
+  llamastack:
+    depends_on:
+      ollama:
+        condition: service_started
+      ollama-init:
+        condition: service_started
+    image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
+    network_mode: ${NETWORK_MODE:-bridge}
+    volumes:
+      - ~/.llama:/root/.llama
+      # Link to ollama run.yaml file
+      - ~/local/llama-stack/:/app/llama-stack-source
+      - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
+    ports:
+      - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
+    environment:
+      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - SAFETY_MODEL=${SAFETY_MODEL:-}
+      - OLLAMA_URL=http://ollama:11434
+    entrypoint: >
+        python -m llama_stack.distribution.server.server /root/my-run.yaml \
+        --port ${LLAMA_STACK_PORT:-5001}
+    deploy:
+      restart_policy:
+        condition: on-failure
+        delay: 10s
+        max_attempts: 3
+        window: 60s
+volumes:
+  ollama:
+  ollama-init:
+  llamastack:
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh b/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh
new file mode 100755
index 000000000..cd0690290
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh
@@ -0,0 +1,18 @@
+#!/bin/sh
+
+# Copyright (c) Meta Platforms, Inc. and affiliates.
+# All rights reserved.
+#
+# This source code is licensed under the terms described in the LICENSE file in
+# the root directory of this source tree.
+
+echo "Preloading (${INFERENCE_MODEL}, ${SAFETY_MODEL})..."
+for model in ${INFERENCE_MODEL} ${SAFETY_MODEL}; do
+  echo "Preloading $model..."
+  if ! ollama run "$model"; then
+    echo "Failed to pull and run $model"
+    exit 1
+  fi
+done
+
+echo "All models pulled successfully"
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml
new file mode 100644
index 000000000..2e4f6ac8a
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml
@@ -0,0 +1,62 @@
+version: '2'
+image_name: ollama
+docker_image: null
+conda_env: ollama
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+- metadata: {}
+  model_id: ${env.SAFETY_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+shields:
+- params: null
+  shield_id: ${env.SAFETY_MODEL}
+  provider_id: null
+  provider_shield_id: null
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml
new file mode 100644
index 000000000..32137fd67
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml
@@ -0,0 +1,54 @@
+version: '2'
+image_name: ollama
+docker_image: null
+conda_env: ollama
+apis:
+- agents
+- inference
+- memory
+- safety
+- telemetry
+providers:
+  inference:
+  - provider_id: ollama
+    provider_type: remote::ollama
+    config:
+      url: ${env.OLLAMA_URL:http://localhost:11434}
+  memory:
+  - provider_id: faiss
+    provider_type: inline::faiss
+    config:
+      kvstore:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
+  safety:
+  - provider_id: llama-guard
+    provider_type: inline::llama-guard
+    config: {}
+  agents:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config:
+      persistence_store:
+        type: sqlite
+        namespace: null
+        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
+  telemetry:
+  - provider_id: meta-reference
+    provider_type: inline::meta-reference
+    config: {}
+metadata_store:
+  namespace: null
+  type: sqlite
+  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
+models:
+- metadata: {}
+  model_id: ${env.INFERENCE_MODEL}
+  provider_id: ollama
+  provider_model_id: null
+shields: []
+memory_banks: []
+datasets: []
+scoring_fns: []
+eval_tasks: []
\ No newline at end of file

From 65a5ab4fdcf310326f191d4b870d4f553591e3ea Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 30 Nov 2024 15:08:22 -0500
Subject: [PATCH 25/69] saving solid docker-compose for spinning up ollama with
 a llama-stack

---
 .../llama_stack_client_examples/README.md     |  2 +
 .../docker-compose.yaml                       | 68 +++++++++++++------
 .../llama_stack_client_examples.ipynb         | 56 ++++++++-------
 .../{llama_stack_ollama => }/pull-models.sh   |  0
 4 files changed, 78 insertions(+), 48 deletions(-)
 rename examples/llama_stack_client_examples/{llama_stack_ollama => }/pull-models.sh (100%)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index d560733f4..e114cea40 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -104,6 +104,8 @@ docker-compose -f docker-compose.yaml up
 
 1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
 
+2. Docker will likely need more system memory resources allocated to it
+
 ## Useful ollama commands
 
 - `ollama list`
diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml
index 4ba388ab4..76684f37b 100644
--- a/examples/llama_stack_client_examples/docker-compose.yaml
+++ b/examples/llama_stack_client_examples/docker-compose.yaml
@@ -5,32 +5,66 @@ services:
     container_name: ollama_server
     ports:
       - "11434:11434" # Map Ollama's port to host
+    environment:
+      OLLAMA_DEBUG: 1
     volumes:
       - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models)
+    deploy:
+      resources:
+        limits:
+          memory: 16G    # Set maximum memory
+        reservations:
+          memory: 12G    # Set minimum memory reservation
     entrypoint: ["ollama", "serve"] # Start the Ollama server
     restart: always # Ensure Ollama server restarts on failure
+    healthcheck:
+      # ugh, no CURL in ollama image
+      test: ["CMD", "curl", "-f", "http://ollama:11434"]
+      interval: 10s
+      timeout: 5s
+      retries: 5
 
   # Ephemeral service to trigger model download
-  model_downloader:
-    image: curlimages/curl:latest # Use a lightweight image with curl
+  # model_downloader:
+  #   image: curlimages/curl:latest # Use a lightweight image with curl
+  #   depends_on:
+  #     - ollama # Ensure the Ollama server starts first
+  #   entrypoint: >
+  #     sh -c "sleep 5 &&
+  #     curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'"
+  #   restart: "no" # Ensure this service doesn't restart
+
+  ollama-init:
+    image: ollama/ollama:latest
     depends_on:
-      - ollama # Ensure the Ollama server starts first
-    entrypoint: >
-      sh -c "sleep 5 &&
-      curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'"
-    restart: "no" # Ensure this service doesn't restart
+      ollama:
+        condition: service_started
+    network_mode: bridge
+    container_name: ollama-init
+    environment:
+      - OLLAMA_HOST=host.docker.internal
+      - INFERENCE_MODEL=llama3.2:3b-instruct-fp16
+    volumes:
+      - ~/.ollama:/root/.ollama
+      - ./pull-models.sh:/root/pull-models.sh
+    entrypoint: ["/root/pull-models.sh"]
 
 
-  tester:
-    image: curlimages/curl:latest # Use a lightweight image with curl
-    depends_on:
-      - model_downloader # Ensure the Ollama server starts first
-    entrypoint: >
-      sh -c "sleep 5 &&
-      curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
-    restart: "no" # Ensure this service doesn't restart
+  # tester:
+  #   image: curlimages/curl:latest # Use a lightweight image with curl
+  #   depends_on:
+  #     - model_downloader # Ensure the Ollama server starts first
+  #   entrypoint: >
+  #     sh -c "sleep 5 &&
+  #     curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
+  #   restart: "no" # Ensure this service doesn't restart
 
   llama-stack:
+    depends_on:
+      ollama:
+        condition: service_started
+      ollama-init:
+        condition: service_started
     image: llamastack/distribution-ollama
     container_name: llama_stack_server
     ports:
@@ -45,10 +79,6 @@ services:
       --yaml-config /root/my-run.yaml
       --port 5001
     platform: linux/amd64
-    depends_on:
-      - ollama
-      - model_downloader
-      - tester
 
 networks:
   default:
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index 917a5a852..f790aac6b 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,14 +17,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 59,
+   "execution_count": 64,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "41840.28s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+      "44374.39s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
      ]
     },
     {
@@ -70,7 +70,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "41847.06s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+      "44382.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
      ]
     },
     {
@@ -97,7 +97,7 @@
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "41853.46s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
+      "44389.50s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
      ]
     },
     {
@@ -127,7 +127,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 60,
+   "execution_count": 65,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -156,7 +156,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 61,
+   "execution_count": 66,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -166,7 +166,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 62,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
     {
@@ -199,14 +199,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 71,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f3dde247-e5bd-4d25-ab2a-08612270cb08\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=165f74ba-2f5d-42d3-957f-b6f175dc2471\u001b[0m\u001b[0m\n"
      ]
     },
     {
@@ -216,8 +216,8 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[18], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:199\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    198\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 199\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
+      "Cell \u001b[0;32mIn[71], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:201\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    200\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 201\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
       "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
       "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1248\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1249\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1256\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1258\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1259\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1260\u001b[0m     )\n\u001b[0;32m-> 1261\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
@@ -232,20 +232,20 @@
     }
    ],
    "source": [
-    "# agentops.start_session()\n",
-    "# response = client.inference.chat_completion(\n",
-    "#     messages=[\n",
-    "#         UserMessage(\n",
-    "#             content=\"write me a 3 word poem about the moon\",\n",
-    "#             role=\"user\",\n",
-    "#         ),\n",
-    "#     ],\n",
-    "#     model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
-    "#     stream=False\n",
-    "# )\n",
+    "agentops.start_session()\n",
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    stream=False\n",
+    ")\n",
     "\n",
-    "# print(f\"> Response: {response}\")\n",
-    "# agentops.end_session(\"Success\")"
+    "print(f\"> Response: {response}\")\n",
+    "agentops.end_session(\"Success\")"
    ]
   },
   {
@@ -257,23 +257,21 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 68,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=16acb375-5b2d-4c7d-a086-276a333ffad4\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5e22565f-ce52-4eba-9de7-65898f52afc1\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
-      "\n",
-      "\u001b[0m\u001b[33m\"M\u001b[0m\u001b[33moon\u001b[0m\u001b[33m's\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m silver\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m\"\u001b[0m"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m beacon\u001b[0m\u001b[33m shines\u001b[0m\u001b[33m\"\u001b[0m"
      ]
     },
     {
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh b/examples/llama_stack_client_examples/pull-models.sh
similarity index 100%
rename from examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh
rename to examples/llama_stack_client_examples/pull-models.sh

From 0114ede7dbda8da9bce6098214cf39c9f7d3e0ff Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 11:15:57 -0500
Subject: [PATCH 26/69] adding documentation for Llama Stack integration

---
 .gitignore                           |  4 +-
 README.md                            |  8 ++++
 docs/mint.json                       |  1 +
 docs/v1/integrations/llama_stack.mdx | 66 ++++++++++++++++++++++++++++
 4 files changed, 78 insertions(+), 1 deletion(-)
 create mode 100644 docs/v1/integrations/llama_stack.mdx

diff --git a/.gitignore b/.gitignore
index 4db649aab..d6ab56734 100644
--- a/.gitignore
+++ b/.gitignore
@@ -164,4 +164,6 @@ cython_debug/
 .DS_Store
 
 agentops_time_travel.json
-.agentops_time_travel.yaml
\ No newline at end of file
+.agentops_time_travel.yaml
+
+node_modules
\ No newline at end of file
diff --git a/README.md b/README.md
index 264c5bc4a..e87981dfa 100644
--- a/README.md
+++ b/README.md
@@ -574,6 +574,14 @@ Check out the [LlamaIndex docs](https://docs.llamaindex.ai/en/stable/module_guid
 
 </details>
 
+### Llama Stack 🦙🥞
+
+AgentOps provides support for Llama Stack Python Client(>=0.0.53), allowing you to monitor your Agentic applications. 
+
+- [AgentOps integration example 1](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb)
+- [AgentOps integration example 2](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699)
+- [Official Llama Stack Python Client](https://github.com/meta-llama/llama-stack-client-python)
+
 ## Time travel debugging 🔮
 
 <div style="justify-content: center">
diff --git a/docs/mint.json b/docs/mint.json
index 45e61b450..ddde98a84 100644
--- a/docs/mint.json
+++ b/docs/mint.json
@@ -93,6 +93,7 @@
         "v1/integrations/cohere",
         "v1/integrations/anthropic",
         "v1/integrations/ollama",
+        "v1/integrations/llama_stack",
         "v1/integrations/litellm",
         "v1/integrations/multion",
         "v1/integrations/rest"
diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx
new file mode 100644
index 000000000..163a4ca8e
--- /dev/null
+++ b/docs/v1/integrations/llama_stack.mdx
@@ -0,0 +1,66 @@
+---
+title: 'Llama Stack'
+description: '[Llama Stack](https://llama-stack.readthedocs.io/) is a framework for building Agentic applications.'
+---
+
+import CodeTooltip from '/snippets/add-code-tooltip.mdx'
+import EnvTooltip from '/snippets/add-env-tooltip.mdx'
+
+AgentOps has built an integration with Llama Stack to make monitoring applications that leverage [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) simple.
+
+Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide.
+
+## Adding AgentOps to Llama Stack applications
+
+<Steps>
+	<Step title="Install the AgentOps SDK">
+		<CodeGroup>
+			```bash pip 
+			pip install agentops
+			```
+			```bash poetry
+			poetry add agentops
+			```
+		</CodeGroup>
+	</Step>
+	<Step title="Add 2 lines of code">
+		<CodeTooltip/>
+		<span className="api-key-container">
+			<CodeGroup>
+				```python python
+				import agentops
+				agentops.init(<INSERT YOUR API KEY HERE>)
+				```
+			</CodeGroup>
+		</span>
+		<EnvTooltip />
+    <span className="api-key-container">
+      <CodeGroup>
+        ```python .env
+        AGENTOPS_API_KEY=<YOUR API KEY>
+        ```
+      </CodeGroup>
+      Read more about environment variables in [Advanced Configuration](/v1/usage/advanced-configuration)
+    </span>
+	</Step>
+	<Step title="Run your 🦙🥞 application">
+		Execute your program and visit [app.agentops.ai/drilldown](https://app.agentops.ai/drilldown) to observe your waterfall! 🕵️
+		<Tip>
+			After your run, AgentOps prints a clickable url to console linking directly to your session in the Dashboard
+		</Tip> 
+	</Step>
+</Steps>
+
+## Llama Stack + AgentOps Examples
+
+<CardGroup cols={2}>
+    <Card title="Agent Class" icon="microchip-ai" href="https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb" />
+	  <Card title="Inference Class" icon="question" href="https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699" />
+</CardGroup>
+
+<script type="module" src="/scripts/github_stars.js"></script>
+<script type="module" src="/scripts/link_to_api_button.js"></script>
+<script type="module" src="/scripts/scroll-img-fadein-animation.js"></script>
+<script type="module" src="/scripts/button_heartbeat_animation.js"></script>
+<script type="css" src="/styles/styles.css"></script>
+<script type="module" src="/scripts/adjust_api_dynamically.js"></script>

From fa800999d56485e47a23684c8666caf3725cbd8a Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 11:41:39 -0500
Subject: [PATCH 27/69] rename compose.yaml files to follow the standard docker
 compose format

---
 .../README.fireworks.md                         |  9 ---------
 examples/llama_stack_client_examples/README.md  |  4 ++--
 ...{docker-compose.yaml => docker.compose.yaml} |  0
 .../llama_stack_fireworks/README.fireworks.md   | 17 +++++++++++++++++
 .../fireworks-server-config.yaml                |  0
 .../fireworks.compose.yaml}                     |  0
 6 files changed, 19 insertions(+), 11 deletions(-)
 delete mode 100644 examples/llama_stack_client_examples/README.fireworks.md
 rename examples/llama_stack_client_examples/{docker-compose.yaml => docker.compose.yaml} (100%)
 create mode 100644 examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
 rename examples/llama_stack_client_examples/{ => llama_stack_fireworks}/fireworks-server-config.yaml (100%)
 rename examples/llama_stack_client_examples/{fireworks-compose.yaml => llama_stack_fireworks/fireworks.compose.yaml} (100%)

diff --git a/examples/llama_stack_client_examples/README.fireworks.md b/examples/llama_stack_client_examples/README.fireworks.md
deleted file mode 100644
index 412821953..000000000
--- a/examples/llama_stack_client_examples/README.fireworks.md
+++ /dev/null
@@ -1,9 +0,0 @@
-##
-
-https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml
-
-##
-
-```sh
-docker-compose -f fireworks-server-config.yaml up
-```
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index e114cea40..3e88eea99 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -69,7 +69,7 @@ ie: Check out the examples in the `llama_stack_client_examples.ipynb` file
 ## Running the Ollama Server in a Docker Container
 
 ```sh - set up the ollama server
-docker-compose -f docker-compose.yaml up
+docker-compose -f docker.compose.yaml up
 ```
 
 ```sh - download a model
@@ -97,7 +97,7 @@ curl http://localhost:11434/api/chat -d '{
 ## 2 - Running the Ollama Server in a Docker Container
 
 ```sh
-docker-compose -f docker-compose.yaml up
+docker-compose -f docker.compose.yaml up
 ```
 
 ## Common Gotchas
diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker.compose.yaml
similarity index 100%
rename from examples/llama_stack_client_examples/docker-compose.yaml
rename to examples/llama_stack_client_examples/docker.compose.yaml
diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
new file mode 100644
index 000000000..899bfbbeb
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
@@ -0,0 +1,17 @@
+# TLDR
+
+Here are the links of where to find Fireworks integration examples in the Llama Stack repository
+
+*Disclaimer: This has been tested but not shown to ever work end-2-end*  
+
+##
+
+https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml
+https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks
+
+
+##
+
+```sh
+docker-compose -f fireworks.compose.yaml up
+```
diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml
similarity index 100%
rename from examples/llama_stack_client_examples/fireworks-server-config.yaml
rename to examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml
diff --git a/examples/llama_stack_client_examples/fireworks-compose.yaml b/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml
similarity index 100%
rename from examples/llama_stack_client_examples/fireworks-compose.yaml
rename to examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml

From b1e433581f0643aaacf77868225b84bb4ba27b20 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 11:52:49 -0500
Subject: [PATCH 28/69] minor tweaks

---
 .../llama_stack_fireworks/README.fireworks.md                    | 1 -
 1 file changed, 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
index 899bfbbeb..e8432b453 100644
--- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
+++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
@@ -9,7 +9,6 @@ Here are the links of where to find Fireworks integration examples in the Llama
 https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml
 https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks
 
-
 ##
 
 ```sh

From dd27a377e26f46f342d145597d53edd387214178 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 12:07:21 -0500
Subject: [PATCH 29/69] add disclaimer in the Fireworks docker compose file

---
 .../llama_stack_fireworks/README.fireworks.md                   | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
index e8432b453..75c10f74b 100644
--- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
+++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
@@ -2,7 +2,7 @@
 
 Here are the links of where to find Fireworks integration examples in the Llama Stack repository
 
-*Disclaimer: This has been tested but not shown to ever work end-2-end*  
+*Disclaimer: This Llama Stack Server + Fireworks setup has been tested but NOT shown to ever work end-2-end*  
 
 ##
 

From 9c4ab6e28ad076d4d585724765d15b19b898ec6c Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 16:16:29 -0500
Subject: [PATCH 30/69] pushing for Alex

---
 agentops/llms/llama_stack_client.py           |  49 +++-
 .../llama_stack_client_examples/README.md     |   1 +
 .../llama_stack_client_examples.ipynb         | 271 +++++++++---------
 .../llama_stack_client_canary/agent_canary.py |   8 +-
 4 files changed, 177 insertions(+), 152 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 25bb65ff9..db19a2ebc 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -1,7 +1,7 @@
 import inspect
 import pprint
 import sys
-from typing import Dict, Optional
+from typing import Any, AsyncGenerator, Dict, Optional
 
 from agentops.event import LLMEvent, ErrorEvent, ToolEvent
 from agentops.session import Session
@@ -15,6 +15,7 @@ class LlamaStackClientProvider(InstrumentedProvider):
     
 
     def __init__(self, client):
+        print("_!_!_ LlamaStackClientProvider _!_!_")
         super().__init__(client)
         self._provider_name = "LlamaStack"
 
@@ -23,9 +24,13 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se
         try:
             accum_delta = None
             accum_tool_delta = None
+            tool_event = None
+            llm_event = None
 
             def handle_stream_chunk(chunk: dict):
-                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+
                 if session is not None:
                     llm_event.session_id = session.session_id
 
@@ -72,19 +77,21 @@ def handle_stream_chunk(chunk: dict):
             def handle_stream_agent(chunk: dict):
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-                llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-                tool_event = ToolEvent(init_timestamp=init_timestamp, params=kwargs)
+                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+                
+                nonlocal llm_event
 
                 if session is not None:
                     llm_event.session_id = session.session_id
 
-                if llm_event.returns is None:
+                if getattr(llm_event, 'returns', None):
                     llm_event.returns = chunk.event
-
                 try:
                     if chunk.event.payload.event_type == "turn_start":
                         pass
                     elif chunk.event.payload.event_type == "step_start":
+                        print("step_start")
+                        llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                         pass
                     elif chunk.event.payload.event_type == "step_progress":
                     
@@ -101,9 +108,15 @@ def handle_stream_agent(chunk: dict):
                         elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):
                             
                             if (chunk.event.payload.tool_call_delta.parse_status == "started"):
+                                print('ToolExecution - started')
+                                nonlocal tool_event
+                                tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs)
+
                                 tool_event.name = "ToolExecution - started"
-                                self._safe_record(session, tool_event)
+                                tool_event.init_timestamp = get_ISO_time()
+                                # self._safe_record(session, tool_event)
                             elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
+                                print('ToolExecution - progress')
                                 nonlocal accum_tool_delta
                                 delta = chunk.event.payload.tool_call_delta.content
                                 if accum_tool_delta:
@@ -111,14 +124,22 @@ def handle_stream_agent(chunk: dict):
                                 else:
                                     accum_tool_delta = delta
                             elif (chunk.event.payload.tool_call_delta.parse_status == "success"):
+                                print('ToolExecution - success')
                                 tool_event.name = "ToolExecution - success"
                                 tool_event.params["completion"] = accum_tool_delta
-                                self._safe_record(session, tool_event)    
+                                tool_event.end_timestamp = get_ISO_time()
+                                # self._safe_record(session, tool_event)    
                             elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
-                                self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
+                                tool_event.name = "ToolExecution - failure"
+                                tool_event.end_timestamp = get_ISO_time()
+                                print('ToolExecution - failure')
+                                pass
+                                # self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
                     elif chunk.event.payload.event_type == "step_complete":
+                        print("step_complete")
                         if (chunk.event.payload.step_type == "inference"):
+                            print("step_complete inference")
                             llm_event.prompt = [
                                 {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
                             ]
@@ -130,6 +151,7 @@ def handle_stream_agent(chunk: dict):
                             llm_event.end_timestamp = get_ISO_time()
                             self._safe_record(session, llm_event)
                         elif (chunk.event.payload.step_type == "tool_execution"):
+                            print('ToolExecution - complete')
                             tool_event.name = "ToolExecution - complete"
                             tool_event.params["completion"] = accum_tool_delta
                             self._safe_record(session, tool_event)
@@ -146,7 +168,6 @@ def handle_stream_agent(chunk: dict):
                         f"chunk:\n {chunk}\n"
                         f"kwargs:\n {kwargs_str}\n"
                     )
-
             if kwargs.get("stream", False):
                 def generator():
                     for chunk in response:
@@ -159,6 +180,12 @@ async def async_generator():
                         handle_stream_agent(chunk)
                         yield chunk
                 return async_generator()
+            elif inspect.isgenerator(response):
+                async def async_generator():
+                    async for chunk in response:
+                        handle_stream_agent(chunk)
+                        yield chunk
+                return async_generator()
             else:
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                 if session is not None:
@@ -215,6 +242,7 @@ def patched_function(*args, **kwargs):
             session = kwargs.get("session", None)
             if "session" in kwargs.keys():
                 del kwargs["session"]
+                
             result = self.original_create_turn(*args, **kwargs)
             return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")})
 
@@ -223,6 +251,7 @@ def patched_function(*args, **kwargs):
 
 
     def override(self):
+        print("_!_!_ override _!_!_")
         self._override_complete()
         self._override_create_turn()
 
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 3e88eea99..784f853ee 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -121,3 +121,4 @@ docker-compose -f docker.compose.yaml up
 - download https://ollama.com/
 - https://www.llama.com/docs/getting_the_models/meta/
 - https://llama-stack.readthedocs.io/en/latest/getting_started/index.html
+- https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
index f790aac6b..384290cc8 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
@@ -17,21 +17,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 64,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "44374.39s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
-     ]
-    },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n",
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n",
       "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
       "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
       "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
@@ -63,20 +56,61 @@
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "44382.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n",
+      "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n",
+      "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n",
+      "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n",
+      "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n",
+      "Requirement already satisfied: llama-models>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n",
+      "Requirement already satisfied: llama-stack-client>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n",
+      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n",
+      "Requirement already satisfied: requests in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.32.3)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n",
+      "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n",
+      "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (6.0.2)\n",
+      "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (3.1.4)\n",
+      "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (0.8.0)\n",
+      "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (11.0.0)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.6.2.post1)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.9.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (24.9.0)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.67.0)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.12.2)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n",
+      "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
+      "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.21.0)\n",
+      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (5.3.0)\n",
+      "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.16.1)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n",
+      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests->llama-stack) (3.4.0)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.56->llama-stack) (3.0.2)\n",
+      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.56->llama-stack) (2024.11.6)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.56->llama-stack) (1.16.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
       "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
       "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
       "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
@@ -90,32 +124,34 @@
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "44389.50s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
+      "Note: you may need to restart the kernel to use updated packages.\n",
       "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: fastapi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.115.5)\n",
+      "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (0.41.3)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (2.10.1)\n",
+      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (4.12.2)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
+      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.6.2.post1)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
       "Note: you may need to restart the kernel to use updated packages.\n"
      ]
     }
    ],
    "source": [
     "%pip install -U llama-stack-client\n",
+    "%pip install -U llama-stack\n",
     "%pip install -U agentops\n",
-    "%pip install -U python-dotenv"
+    "%pip install -U python-dotenv\n",
+    "%pip install -U fastapi\n"
    ]
   },
   {
@@ -127,10 +163,11 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 65,
+   "execution_count": 1,
    "metadata": {},
    "outputs": [],
    "source": [
+    "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
     "from llama_stack_client.types import UserMessage\n",
@@ -139,46 +176,11 @@
     "from dotenv import load_dotenv\n",
     "import os\n",
     "import agentops\n",
-    "import asyncio"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables"
-   ]
-  },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": []
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 66,
-   "metadata": {},
-   "outputs": [],
-   "source": [
+    "\n",
     "load_dotenv()\n",
-    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\""
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n"
-     ]
-    }
-   ],
-   "source": [
-    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\"\n",
+    "\n",
+    "# agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
     "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
     "port = 5001 # LLAMA_STACK_PORT\n",
@@ -199,14 +201,14 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 71,
+   "execution_count": 5,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=165f74ba-2f5d-42d3-957f-b6f175dc2471\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5f22f2fd-2561-4b8d-8d8c-1ae875d8075c\u001b[0m\u001b[0m\n"
      ]
     },
     {
@@ -216,17 +218,17 @@
      "traceback": [
       "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
       "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[71], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:201\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    199\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    200\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 201\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
+      "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:207\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    206\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 207\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
       "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
       "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1248\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1249\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1256\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1258\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1259\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1260\u001b[0m     )\n\u001b[0;32m-> 1261\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:953\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    950\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    951\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 953\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    954\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    955\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1040\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1042\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1043\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1091\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1092\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1040\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1042\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1043\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1091\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1092\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1056\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1053\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1055\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1056\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1058\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1059\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1060\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1064\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1065\u001b[0m )\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1250\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1251\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1258\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1260\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1261\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1262\u001b[0m     )\n\u001b[0;32m-> 1263\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1055\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1057\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1061\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1062\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1066\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1067\u001b[0m )\n",
       "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
      ]
     }
@@ -240,7 +242,7 @@
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
     "    stream=False\n",
     ")\n",
     "\n",
@@ -257,43 +259,31 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 68,
+   "execution_count": 6,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5e22565f-ce52-4eba-9de7-65898f52afc1\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m beacon\u001b[0m\u001b[33m shines\u001b[0m\u001b[33m\"\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n"
      ]
     },
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "\u001b[97m\u001b[0m\n"
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
+      "\n",
+      "\u001b[0m\u001b[33m\"L\u001b[0m\u001b[33munar\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.1s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n"
      ]
     }
    ],
@@ -306,7 +296,7 @@
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
-    "    model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
     "    stream=True\n",
     ")\n",
     "\n",
@@ -318,34 +308,22 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 54,
+   "execution_count": null,
    "metadata": {},
    "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "_!_!_ LlamaStackClientProvider _!_!_\n",
+      "_!_!_ override _!_!_\n"
+     ]
+    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
-      "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n",
-      "response:\n",
-      " <generator object Agent.create_turn at 0x1105648c0>\n",
-      "kwargs:\n",
-      " {'messages': [{'content': 'Hello', 'role': 'user'}],\n",
-      " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n",
-      "\n",
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
-      "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n",
-      "response:\n",
-      " <generator object Agent.create_turn at 0x110566ce0>\n",
-      "kwargs:\n",
-      " {'messages': [{'content': 'Which players played in the winning team of the NBA '\n",
-      "                          'western conference semifinals of 2024, please use '\n",
-      "                          'tools',\n",
-      "               'role': 'user'}],\n",
-      " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n",
-      "\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n"
      ]
     },
     {
@@ -354,8 +332,16 @@
      "text": [
       "No available shields. Disable safety.\n",
       "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
-      "Response:  <generator object Agent.create_turn at 0x1105648c0>\n",
-      "Response:  <generator object Agent.create_turn at 0x110566ce0>\n"
+      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x10ee067a0>\n",
+      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x10ee70900>\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n"
      ]
     }
    ],
@@ -366,6 +352,8 @@
     "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
     "from llama_stack_client.types.agent_create_params import AgentConfig\n",
     "\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "\n",
     "# Apply nest_asyncio to handle nested event loops\n",
     "# nest_asyncio.apply()\n",
     "\n",
@@ -376,7 +364,7 @@
     "\n",
     "async def agent_test():\n",
     "    client = LlamaStackClient(\n",
-    "        base_url=f\"http://localhost:{LLAMA_STACK_PORT}\",\n",
+    "        base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n",
     "    )\n",
     "\n",
     "    available_shields = [shield.identifier for shield in client.shields.list()]\n",
@@ -415,7 +403,7 @@
     "    agent = Agent(client, agent_config)\n",
     "    user_prompts = [\n",
     "        \"Hello\",\n",
-    "        \"Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools\",\n",
+    "        \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n",
     "    ]\n",
     "\n",
     "    session_id = agent.create_session(\"test-session\")\n",
@@ -431,9 +419,16 @@
     "            session_id=session_id,\n",
     "        )\n",
     "\n",
-    "        print(\"Response: \", response)\n",
+    "        print(f\"{response=}\")\n",
+    "\n",
+    "        # async for log in EventLogger().log(response):\n",
+    "        #     log.print()\n",
+    "\n",
+    "agentops.start_session()\n",
+    "\n",
+    "await agent_test()\n",
     "\n",
-    "await agent_test()"
+    "agentops.end_session(\"Success\")"
    ]
   },
   {
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index b466f45d0..ea0042d36 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -12,9 +12,9 @@
 LLAMA_STACK_PORT = 5001
 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
-# import debugpy
-# debugpy.listen(5678)
-# debugpy.wait_for_client()
+import debugpy
+debugpy.listen(5678)
+debugpy.wait_for_client()
 
 agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False)
 
@@ -60,7 +60,7 @@ async def agent_test():
     agent = Agent(client, agent_config)
     user_prompts = [
         "Hello",
-        "Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools",
+        "Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools",
     ]
 
     session_id = agent.create_session("test-session")

From 978d4f055498cd9f82cbf7db21dc88f22425c821 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 19:53:32 -0500
Subject: [PATCH 31/69] saving changes to track Llama Stack Agent events with a
 stack data structure

---
 agentops/llms/llama_stack_client.py | 123 +++++++++++++++++-----------
 1 file changed, 74 insertions(+), 49 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index db19a2ebc..3e94401fa 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -15,35 +15,35 @@ class LlamaStackClientProvider(InstrumentedProvider):
     
 
     def __init__(self, client):
-        print("_!_!_ LlamaStackClientProvider _!_!_")
         super().__init__(client)
         self._provider_name = "LlamaStack"
 
     def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict:
         """Handle responses for LlamaStack"""
         try:
+            stack = []
             accum_delta = None
             accum_tool_delta = None
-            tool_event = None
-            llm_event = None
+            # tool_event = None
+            # llm_event = None
 
             def handle_stream_chunk(chunk: dict):
                 # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                 # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
 
-                if session is not None:
-                    llm_event.session_id = session.session_id
+                # if session is not None:
+                #     llm_event.session_id = session.session_id
 
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-                if llm_event.returns is None:
-                    llm_event.returns = chunk.event
+                # if llm_event.returns is None:
+                #     llm_event.returns = chunk.event
 
                 try:
                     nonlocal accum_delta
-                    llm_event.agent_id = check_call_stack_for_agent_id()
-                    llm_event.model = kwargs["model_id"]
-                    llm_event.prompt = kwargs["messages"]
+                    # llm_event.agent_id = check_call_stack_for_agent_id()
+                    # llm_event.model = kwargs["model_id"]
+                    # llm_event.prompt = kwargs["messages"]
 
                     # NOTE: We assume for completion only choices[0] is relevant
                     # chunk.event
@@ -79,44 +79,53 @@ def handle_stream_agent(chunk: dict):
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
                 # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                 
-                nonlocal llm_event
+                # nonlocal llm_event
+                nonlocal stack
 
                 if session is not None:
                     llm_event.session_id = session.session_id
 
-                if getattr(llm_event, 'returns', None):
-                    llm_event.returns = chunk.event
+                # if getattr(llm_event, 'returns', None):
+                #     llm_event.returns = chunk.event
                 try:
                     if chunk.event.payload.event_type == "turn_start":
-                        pass
+                        print("turn_start")
+                        stack.append({
+                            'event_type': chunk.event.payload.event_type,
+                            'event': None
+                        })
                     elif chunk.event.payload.event_type == "step_start":
                         print("step_start")
-                        llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-                        pass
+                        llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                        stack.append({
+                            'event_type': chunk.event.payload.event_type,
+                            'event': llm_event
+                        })
                     elif chunk.event.payload.event_type == "step_progress":
-                    
                         if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response):
                             nonlocal accum_delta
                             delta = chunk.event.payload.text_delta_model_response
-                            llm_event.agent_id = check_call_stack_for_agent_id()
-                            llm_event.prompt = kwargs["messages"]
+                            # llm_event.agent_id = check_call_stack_for_agent_id()
+                            # llm_event.prompt = kwargs["messages"]
 
                             if accum_delta:
                                 accum_delta += delta
                             else:
                                 accum_delta = delta
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):
-                            
+                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):  
                             if (chunk.event.payload.tool_call_delta.parse_status == "started"):
-                                print('ToolExecution - started')
-                                nonlocal tool_event
+                                print('tool_started')
                                 tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                                tool_event.name = "tool_started"
+
+                                stack.append({
+                                    "event_type": "tool_started",
+                                    "event": tool_event
+                                })
 
-                                tool_event.name = "ToolExecution - started"
-                                tool_event.init_timestamp = get_ISO_time()
                                 # self._safe_record(session, tool_event)
                             elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
-                                print('ToolExecution - progress')
+                                # print('ToolExecution - in_progress')
                                 nonlocal accum_tool_delta
                                 delta = chunk.event.payload.tool_call_delta.content
                                 if accum_tool_delta:
@@ -125,40 +134,57 @@ def handle_stream_agent(chunk: dict):
                                     accum_tool_delta = delta
                             elif (chunk.event.payload.tool_call_delta.parse_status == "success"):
                                 print('ToolExecution - success')
-                                tool_event.name = "ToolExecution - success"
-                                tool_event.params["completion"] = accum_tool_delta
-                                tool_event.end_timestamp = get_ISO_time()
-                                # self._safe_record(session, tool_event)    
+                                if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event
+                                    
+                                    tool_event = stack.pop().get("event")
+                                    tool_event.end_timestamp = get_ISO_time()
+                                    # tool_event.name = "ToolExecution - success"
+                                    tool_event.params["completion"] = accum_tool_delta
+                                    self._safe_record(session, tool_event)    
                             elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
-                                tool_event.name = "ToolExecution - failure"
-                                tool_event.end_timestamp = get_ISO_time()
                                 print('ToolExecution - failure')
-                                pass
-                                # self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
+                                if stack[-1]['event_type'] == "ToolExecution - started":
+                                    tool_event = stack.pop().get("event")
+                                    tool_event.end_timestamp = get_ISO_time()
+                                    # tool_event.name = "ToolExecution - failure"
+                                    tool_event.params["completion"] = accum_tool_delta
+                                    self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
                     elif chunk.event.payload.event_type == "step_complete":
                         print("step_complete")
                         if (chunk.event.payload.step_type == "inference"):
+
                             print("step_complete inference")
-                            llm_event.prompt = [
-                                {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
-                            ]
-                            llm_event.agent_id = check_call_stack_for_agent_id()
-                            llm_event.model = metadata.get("model_id", "Unable to identify model")
-                            llm_event.prompt_tokens = None
-                            llm_event.completion = accum_delta or kwargs["completion"]
-                            llm_event.completion_tokens = None
-                            llm_event.end_timestamp = get_ISO_time()
-                            self._safe_record(session, llm_event)
+                            if stack[-1]['event_type'] == "step_start": # check if the last event in the stack is a step start event
+                                llm_event = stack.pop().get("event")                            
+                                llm_event.prompt = [
+                                    {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
+                                ]
+                                llm_event.agent_id = check_call_stack_for_agent_id()
+                                llm_event.model = metadata.get("model_id", "Unable to identify model")
+                                llm_event.prompt_tokens = None
+                                llm_event.completion = accum_delta or kwargs["completion"]
+                                llm_event.completion_tokens = None
+                                llm_event.end_timestamp = get_ISO_time()
+                                self._safe_record(session, llm_event)
                         elif (chunk.event.payload.step_type == "tool_execution"):
-                            print('ToolExecution - complete')
-                            tool_event.name = "ToolExecution - complete"
-                            tool_event.params["completion"] = accum_tool_delta
-                            self._safe_record(session, tool_event)
+                            if stack[-1]['event_type'] == "tool_started":
+                                print('tool_complete')
+                                tool_event = stack.pop().get("event")
+                                tool_event.name = "tool_complete"
+                                tool_event.params["completion"] = accum_tool_delta
+                                self._safe_record(session, tool_event)
                     elif chunk.event.payload.event_type == "turn_complete":
+                        if stack[-1]['event_type'] == "turn_start":
+                            print('turn_start')
+                            # llm_event = stack.pop()
+                            # llm_event.end_timestamp = get_ISO_time()
+                            # self._safe_record(session, llm_event)
                         pass
 
                 except Exception as e:
+                    llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs)
+
                     self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
 
                     kwargs_str = pprint.pformat(kwargs)
@@ -251,7 +277,6 @@ def patched_function(*args, **kwargs):
 
 
     def override(self):
-        print("_!_!_ override _!_!_")
         self._override_complete()
         self._override_create_turn()
 

From d23564341b4626d355531b4982171965d120ddcb Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 19:59:34 -0500
Subject: [PATCH 32/69] removing commented code

---
 agentops/llms/llama_stack_client.py | 2 --
 1 file changed, 2 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 3e94401fa..688026e64 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -105,8 +105,6 @@ def handle_stream_agent(chunk: dict):
                         if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response):
                             nonlocal accum_delta
                             delta = chunk.event.payload.text_delta_model_response
-                            # llm_event.agent_id = check_call_stack_for_agent_id()
-                            # llm_event.prompt = kwargs["messages"]
 
                             if accum_delta:
                                 accum_delta += delta

From 3ee63ccc7b06c48ed715e63cdf49c4df5827e55a Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 20:31:03 -0500
Subject: [PATCH 33/69] tweak handle_stream_chunk in handle_response function
 of Llama Stack LLM provider to use a stack data structure

---
 agentops/llms/llama_stack_client.py           | 45 ++++++++++++-------
 .../inference_canary.py                       |  6 +--
 2 files changed, 32 insertions(+), 19 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 688026e64..9dfb604dd 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -28,11 +28,8 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se
             # llm_event = None
 
             def handle_stream_chunk(chunk: dict):
-                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
 
-                # if session is not None:
-                #     llm_event.session_id = session.session_id
+                nonlocal stack
 
                 # NOTE: prompt/completion usage not returned in response when streaming
                 # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
@@ -45,25 +42,41 @@ def handle_stream_chunk(chunk: dict):
                     # llm_event.model = kwargs["model_id"]
                     # llm_event.prompt = kwargs["messages"]
 
-                    # NOTE: We assume for completion only choices[0] is relevant
-                    # chunk.event
-
                     if chunk.event.event_type == "start":
+                        llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
+                        stack.append({
+                            'event_type': "start",
+                            'event': llm_event
+                        })
                         accum_delta = chunk.event.delta
                     elif chunk.event.event_type == "progress":
                         accum_delta += chunk.event.delta
                     elif chunk.event.event_type == "complete":
-                        llm_event.prompt = [
-                            {"content": message.content, "role": message.role} for message in kwargs["messages"]
-                        ]
-                        llm_event.agent_id = check_call_stack_for_agent_id()
-                        llm_event.prompt_tokens = None
-                        llm_event.completion = accum_delta
-                        llm_event.completion_tokens = None
-                        llm_event.end_timestamp = get_ISO_time()
-                        self._safe_record(session, llm_event)
+                        if stack[-1]['event_type'] == "start": # check if the last event in the stack is a step start event
+                            llm_event = stack.pop().get("event")                            
+                            llm_event.prompt = [
+                                {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                            ]
+                            llm_event.agent_id = check_call_stack_for_agent_id()
+                            llm_event.model = metadata.get("model_id", "Unable to identify model")
+                            llm_event.prompt_tokens = None
+                            llm_event.completion = accum_delta or kwargs["completion"]
+                            llm_event.completion_tokens = None
+                            llm_event.end_timestamp = get_ISO_time()
+                            self._safe_record(session, llm_event)
+
+                        # llm_event.prompt = [
+                        #     {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                        # ]
+                        # llm_event.agent_id = check_call_stack_for_agent_id()
+                        # llm_event.prompt_tokens = None
+                        # llm_event.completion = accum_delta
+                        # llm_event.completion_tokens = None
+                        # llm_event.end_timestamp = get_ISO_time()
+                        # self._safe_record(session, llm_event)
 
                 except Exception as e:
+                    llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs)
                     self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))
 
                     kwargs_str = pprint.pformat(kwargs)
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
index 38dec66cc..02a86e914 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
@@ -11,9 +11,9 @@
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
-# import debugpy
-# debugpy.listen(5678)
-# debugpy.wait_for_client()
+import debugpy
+debugpy.listen(5678)
+debugpy.wait_for_client()
 
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT

From 44494e13fb824843b00c72511cf78495cfc2ebfe Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 20:37:41 -0500
Subject: [PATCH 34/69] removing comments

---
 agentops/llms/llama_stack_client.py | 10 ----------
 1 file changed, 10 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 9dfb604dd..226fc9506 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -65,16 +65,6 @@ def handle_stream_chunk(chunk: dict):
                             llm_event.end_timestamp = get_ISO_time()
                             self._safe_record(session, llm_event)
 
-                        # llm_event.prompt = [
-                        #     {"content": message.content, "role": message.role} for message in kwargs["messages"]
-                        # ]
-                        # llm_event.agent_id = check_call_stack_for_agent_id()
-                        # llm_event.prompt_tokens = None
-                        # llm_event.completion = accum_delta
-                        # llm_event.completion_tokens = None
-                        # llm_event.end_timestamp = get_ISO_time()
-                        # self._safe_record(session, llm_event)
-
                 except Exception as e:
                     llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs)
                     self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e))

From bcf22a8b8853cd49764db8105fb8206277899c63 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 21:10:33 -0500
Subject: [PATCH 35/69] inference_canary 1 and 2 now for clarity

---
 .../llama_stack_client_examples/README.md     |  1 +
 ...erence_canary.py => inference_canary_1.py} | 13 -----
 .../inference_canary_2.py                     | 57 +++++++++++++++++++
 3 files changed, 58 insertions(+), 13 deletions(-)
 rename tests/core_manual_tests/providers/llama_stack_client_canary/{inference_canary.py => inference_canary_1.py} (78%)
 create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 784f853ee..9aeb16426 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -111,6 +111,7 @@ docker-compose -f docker.compose.yaml up
 - `ollama list`
 - `ollama help`
 - `ollama ps`
+- `tail -f ~/.ollama/logs/server.log`
 
 ## Reference links used during development
 
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
similarity index 78%
rename from tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
rename to tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
index 02a86e914..3a1e95a20 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
@@ -1,5 +1,4 @@
 import asyncio
-
 import agentops
 import os
 from dotenv import load_dotenv
@@ -42,18 +41,6 @@ async def stream_test():
 
 def main():
     agentops.start_session()
-
-    # client.inference.chat_completion(
-    #     messages=[
-    #         UserMessage(
-    #             content="hello world, write me a 3 word poem about the moon",
-    #             role="user",
-    #         ),
-    #     ],
-    #     model_id="meta-llama/Llama-3.2-1B-Instruct",
-    #     stream=False,
-    # )
-
     asyncio.run(stream_test())
     agentops.end_session(end_state="Success")
 
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
new file mode 100644
index 000000000..ee1bcd9e7
--- /dev/null
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -0,0 +1,57 @@
+import agentops
+import os
+from dotenv import load_dotenv
+from llama_stack_client import LlamaStackClient
+from llama_stack_client.types import UserMessage
+from llama_stack_client.lib.inference.event_logger import EventLogger
+
+load_dotenv()
+
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
+
+import debugpy
+debugpy.listen(5678)
+debugpy.wait_for_client()
+
+host = "0.0.0.0"  # LLAMA_STACK_HOST
+port = 5001  # LLAMA_STACK_PORT
+
+full_host = f"http://{host}:{port}"
+
+client = LlamaStackClient(
+    base_url=f"{full_host}",
+)
+
+async def stream_test():
+    response = client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
+        stream=True,
+    )
+
+    async for log in EventLogger().log(response):
+        log.print()
+
+
+def main():
+    agentops.start_session()
+
+    client.inference.chat_completion(
+        messages=[
+            UserMessage(
+                content="hello world, write me a 3 word poem about the moon",
+                role="user",
+            ),
+        ],
+        model_id="meta-llama/Llama-3.2-1B-Instruct",
+        stream=False,
+    )
+    
+    agentops.end_session(end_state="Success")
+
+main()

From a4fec782f56b547778b247ac04249ca1fd7da906 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Mon, 2 Dec 2024 21:11:32 -0500
Subject: [PATCH 36/69] organizing canaries

---
 .../llama_stack_client_canary/inference_canary_1.py         | 6 +++---
 .../llama_stack_client_canary/inference_canary_2.py         | 6 +++---
 2 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
index 3a1e95a20..66717f898 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
@@ -10,9 +10,9 @@
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
-import debugpy
-debugpy.listen(5678)
-debugpy.wait_for_client()
+# import debugpy
+# debugpy.listen(5678)
+# debugpy.wait_for_client()
 
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
index ee1bcd9e7..f18db96bf 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -9,9 +9,9 @@
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
-import debugpy
-debugpy.listen(5678)
-debugpy.wait_for_client()
+# import debugpy
+# debugpy.listen(5678)
+# debugpy.wait_for_client()
 
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT

From 7319616be71e13fff64bd514afdcc659deb0f9ff Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 05:09:18 -0600
Subject: [PATCH 37/69] not a big deal

---
 agentops/llms/llama_stack_client.py           |  29 ++--
 .../llama_stack_client_examples/README.md     | 153 ++++++------------
 ...ks.compose.yaml => compose.fireworks.yaml} |   4 +-
 .../{llama_stack_ollama => }/compose.yaml     |  45 ++++--
 .../docker.compose.yaml                       |  85 ----------
 ...rver-config.yaml => fireworks.config.yaml} |   0
 .../llama-stack-server-config.yaml            |  54 -------
 .../llama_stack_fireworks/README.fireworks.md |  16 --
 .../llama_stack_ollama/README.md              |   4 -
 ...k_client_examples.ipynb => notebook.ipynb} |  10 ++
 ...ith-safety.yaml => run-safety-shield.yaml} |   0
 .../{llama_stack_ollama => }/run.yaml         |   3 +-
 12 files changed, 105 insertions(+), 298 deletions(-)
 rename examples/llama_stack_client_examples/{llama_stack_fireworks/fireworks.compose.yaml => compose.fireworks.yaml} (75%)
 rename examples/llama_stack_client_examples/{llama_stack_ollama => }/compose.yaml (56%)
 delete mode 100644 examples/llama_stack_client_examples/docker.compose.yaml
 rename examples/llama_stack_client_examples/{llama_stack_fireworks/fireworks-server-config.yaml => fireworks.config.yaml} (100%)
 delete mode 100644 examples/llama_stack_client_examples/llama-stack-server-config.yaml
 delete mode 100644 examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
 delete mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/README.md
 rename examples/llama_stack_client_examples/{llama_stack_client_examples.ipynb => notebook.ipynb} (99%)
 rename examples/llama_stack_client_examples/{llama_stack_ollama/run-with-safety.yaml => run-safety-shield.yaml} (100%)
 rename examples/llama_stack_client_examples/{llama_stack_ollama => }/run.yaml (96%)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 226fc9506..e5c2c1559 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -1,7 +1,9 @@
 import inspect
 import pprint
 import sys
-from typing import Any, AsyncGenerator, Dict, Optional
+from typing import Any, AsyncGenerator, Dict, Optional, List
+import logging
+from typing import Union
 
 from agentops.event import LLMEvent, ErrorEvent, ToolEvent
 from agentops.session import Session
@@ -92,13 +94,13 @@ def handle_stream_agent(chunk: dict):
                 #     llm_event.returns = chunk.event
                 try:
                     if chunk.event.payload.event_type == "turn_start":
-                        print("turn_start")
+                        logger.debug("turn_start")
                         stack.append({
                             'event_type': chunk.event.payload.event_type,
                             'event': None
                         })
                     elif chunk.event.payload.event_type == "step_start":
-                        print("step_start")
+                        logger.debug("step_start")
                         llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
                         stack.append({
                             'event_type': chunk.event.payload.event_type,
@@ -115,7 +117,7 @@ def handle_stream_agent(chunk: dict):
                                 accum_delta = delta
                         elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):  
                             if (chunk.event.payload.tool_call_delta.parse_status == "started"):
-                                print('tool_started')
+                                logger.debug('tool_started')
                                 tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs)
                                 tool_event.name = "tool_started"
 
@@ -134,7 +136,7 @@ def handle_stream_agent(chunk: dict):
                                 else:
                                     accum_tool_delta = delta
                             elif (chunk.event.payload.tool_call_delta.parse_status == "success"):
-                                print('ToolExecution - success')
+                                logger.debug('ToolExecution - success')
                                 if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event
                                     
                                     tool_event = stack.pop().get("event")
@@ -143,7 +145,7 @@ def handle_stream_agent(chunk: dict):
                                     tool_event.params["completion"] = accum_tool_delta
                                     self._safe_record(session, tool_event)    
                             elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
-                                print('ToolExecution - failure')
+                                logger.warning('ToolExecution - failure')
                                 if stack[-1]['event_type'] == "ToolExecution - started":
                                     tool_event = stack.pop().get("event")
                                     tool_event.end_timestamp = get_ISO_time()
@@ -152,11 +154,12 @@ def handle_stream_agent(chunk: dict):
                                     self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
                     elif chunk.event.payload.event_type == "step_complete":
-                        print("step_complete")
+                        logger.debug("Step complete event received")
+                        
                         if (chunk.event.payload.step_type == "inference"):
-
-                            print("step_complete inference")
-                            if stack[-1]['event_type'] == "step_start": # check if the last event in the stack is a step start event
+                            logger.debug("Step complete inference")
+                            
+                            if stack[-1]['event_type'] == "step_start":
                                 llm_event = stack.pop().get("event")                            
                                 llm_event.prompt = [
                                     {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
@@ -168,16 +171,18 @@ def handle_stream_agent(chunk: dict):
                                 llm_event.completion_tokens = None
                                 llm_event.end_timestamp = get_ISO_time()
                                 self._safe_record(session, llm_event)
+                            else:
+                                logger.warning("Unexpected event stack state for inference step complete")
                         elif (chunk.event.payload.step_type == "tool_execution"):
                             if stack[-1]['event_type'] == "tool_started":
-                                print('tool_complete')
+                                logger.debug('tool_complete')
                                 tool_event = stack.pop().get("event")
                                 tool_event.name = "tool_complete"
                                 tool_event.params["completion"] = accum_tool_delta
                                 self._safe_record(session, tool_event)
                     elif chunk.event.payload.event_type == "turn_complete":
                         if stack[-1]['event_type'] == "turn_start":
-                            print('turn_start')
+                            logger.debug('turn_start')
                             # llm_event = stack.pop()
                             # llm_event.end_timestamp = get_ISO_time()
                             # self._safe_record(session, llm_event)
diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 9aeb16426..9c6a4ca40 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -1,125 +1,66 @@
-# TLDR
+# Llama Stack Client Examples
 
-How to set up a Llama Stack server for supporting the `llama_stack_client_example.ipynb` examples
+Run Llama Stack with Ollama - either local or containerized.
 
-## Disclaimer
+## Quick Start
 
-As of 11/2024, Llama Stack is new and is subject to breaking changes.
-Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/
+Just run:
 
-## ToC
-
-1. Running the Ollama Server and Llama Stack Server on the Host 
-  - a) Download, install, & start Ollama
-  - b) Start the Llama Stack Server
-  - c) Call the Llama Stack Server with a Llama Stack Client
-2. Running the Ollama Server in a Docker Container
-
-## Running the Ollama Server and Llama Stack Server on the Host 
-
-### 1a - Download, install, & start Ollama
-
-https://ollama.com/
-
-Ollama has an easy-to-use installer available for macOS, Linux, and Windows.
-
-```sh
-export OLLAMA_INFERENCE_MODEL="llama3.2:1b-instruct-fp16"
-ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m
-ollama run llama3.2:1b --keepalive 60m
-```
-
-### 1b - Start the Llama Stack server
-
-You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml`
-
-```sh
-export LLAMA_STACK_PORT=5001
-export INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct"
-docker run \
-  -it \
-  -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \
-  -v ~/.llama:/root/.llama \
-  -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \
-  llamastack/distribution-ollama \
-  --yaml-config /root/my-run.yaml \
-  --port $LLAMA_STACK_PORT \
-  --env INFERENCE_MODEL=$INFERENCE_MODEL \
-  --env OLLAMA_URL=http://host.docker.internal:11434
+```bash
+./start-stack.sh
 ```
 
-```sh
-docker run \
-  -it \
-  -p 5001:5001 \
-  -v ~/.llama:/root/.llama \
-  -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \
-  llamastack/distribution-ollama \
-  --yaml-config /root/my-run.yaml \
-  --port 5001 \
-  --env INFERENCE_MODEL=meta-llama/Llama-3.2-1B \
-  --env OLLAMA_URL=http://host.docker.internal:11434
-```
+The script will:
+1. Check if Ollama is already running locally
+2. Check if Llama Stack server is already running
+3. Guide you through what needs to be started
 
+Example outputs:
 
-### 1c - Call the Llama Stack Server with a Llama Stack Client
+```bash
+# Scenario 1: Ollama running locally
+✓ Ollama server is running locally
+✗ No Llama Stack server detected
+Start Llama Stack server? [Y/n] 
 
-ie: Check out the examples in the `llama_stack_client_examples.ipynb` file
-
-## Running the Ollama Server in a Docker Container
-
-```sh - set up the ollama server
-docker-compose -f docker.compose.yaml up
+# Scenario 2: Nothing running
+✗ No local Ollama server detected
+✗ No Llama Stack server detected
+No Ollama server detected. Start both Ollama and Llama Stack? [Y/n] 
 ```
 
-```sh - download a model
-curl -X POST http://localhost:11434/api/pull -d '{"model": "llama3.2:1b"}'
-```
+## Environment Variables
 
-```sh - test the model
-curl http://localhost:11434/api/generate -d '{
-  "model": "llama3.2:1b",
-  "prompt": "Why is the sky blue?"
-}'
-
-curl http://localhost:11434/api/chat -d '{
-  "model": "llama3.2:1b",
-  "messages": [
-    {
-      "role": "user",
-      "content": "why is the sky blue?"
-    }
-  ],
-  "stream": false
-}'
-```
+| Variable | Description | Default |
+|----------|-------------|---------|
+| `LLAMA_STACK_PORT` | Server port | 5001 |
+| `INFERENCE_MODEL` | Model ID | meta-llama/Llama-3.2-3B-Instruct |
+| `SAFETY_MODEL` | Optional safety model | - |
+| `NETWORK_MODE` | Docker network mode | auto-configured |
+| `OLLAMA_URL` | Ollama API URL | auto-configured |
 
-## 2 - Running the Ollama Server in a Docker Container
+## Notes
 
-```sh
-docker-compose -f docker.compose.yaml up
+```
+llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list
+┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
+┃ identifier                       ┃ provider_id ┃ provider_resource_id      ┃ metadata ┃
+┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
+│ meta-llama/Llama-3.2-3B-Instruct │ ollama      │ llama3.2:3b-instruct-fp16 │          │
+└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘
 ```
 
-## Common Gotchas
-
-1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct`
-
-2. Docker will likely need more system memory resources allocated to it
-
-## Useful ollama commands
+2. Docker needs sufficient memory allocation
 
-- `ollama list`
-- `ollama help`
-- `ollama ps`
-- `tail -f ~/.ollama/logs/server.log`
+3. Ollama commands:
+   ```bash
+   ollama list
+   ollama help
+   ollama ps
+   ```
 
-## Reference links used during development
+## References
 
-- https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml
-- https://llama-stack.readthedocs.io
-- https://github.com/meta-llama/llama-stack-client-python
-- https://github.com/meta-llama/llama-stack
-- download https://ollama.com/
-- https://www.llama.com/docs/getting_the_models/meta/
-- https://llama-stack.readthedocs.io/en/latest/getting_started/index.html
-- https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py
\ No newline at end of file
+- [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md)
+- [Llama Stack Docs](https://llama-stack.readthedocs.io)
+- [Ollama](https://ollama.com/)
diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml b/examples/llama_stack_client_examples/compose.fireworks.yaml
similarity index 75%
rename from examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml
rename to examples/llama_stack_client_examples/compose.fireworks.yaml
index fcac78a29..4eb5aff0b 100644
--- a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml
+++ b/examples/llama_stack_client_examples/compose.fireworks.yaml
@@ -4,10 +4,10 @@ services:
     network_mode: "host"
     volumes:
       - ~/.llama:/root/.llama
-      - ./run.yaml:/root/llamastack-run-fireworks.yaml
+      - ./run.yaml:/root/run.yaml
     ports:
       - "5000:5000"
-    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-fireworks.yaml"
+    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/run.yaml"
     deploy:
       restart_policy:
         condition: on-failure
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
similarity index 56%
rename from examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml
rename to examples/llama_stack_client_examples/compose.yaml
index 9eff1970d..d2add1198 100644
--- a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -1,7 +1,6 @@
 services:
   ollama:
     image: ollama/ollama:latest
-    container_name: ollama
     network_mode: ${NETWORK_MODE:-bridge}
     volumes:
       - ~/.ollama:/root/.ollama
@@ -16,28 +15,28 @@ services:
           memory: 8G    # Set maximum memory
         reservations:
           memory: 8G    # Set minimum memory reservation
-    # healthcheck:
-    #   # ugh, no CURL in ollama image
-    #   test: ["CMD", "curl", "-f", "http://ollama:11434"]
-    #   interval: 10s
-    #   timeout: 5s
-    #   retries: 5
+    healthcheck:
+      # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶
+      # - fine 
+      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: ollama\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect ollama:11434 2>/dev/null | grep \"HTTP/1.1 200\""]
+      interval: 10s
+      timeout: 5s
+      retries: 5
 
   ollama-init:
     image: ollama/ollama:latest
     depends_on:
-      - ollama
-        # condition: service_healthy
+      ollama:
+        condition: service_healthy
     network_mode: ${NETWORK_MODE:-bridge}
-    container_name: ollama-init
     environment:
       - OLLAMA_HOST=ollama
       - INFERENCE_MODEL=${INFERENCE_MODEL}
       - SAFETY_MODEL=${SAFETY_MODEL:-}
     volumes:
       - ~/.ollama:/root/.ollama
-      - ./pull-models.sh:/root/pull-models.sh
-    entrypoint: ["/root/pull-models.sh"]
+      - ./pull-models.sh:/pull-models.sh
+    entrypoint: ["/pull-models.sh"]
 
   llamastack:
     depends_on:
@@ -51,7 +50,7 @@ services:
       - ~/.llama:/root/.llama
       # Link to ollama run.yaml file
       - ~/local/llama-stack/:/app/llama-stack-source
-      - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml
+      - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml
     ports:
       - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
     environment:
@@ -59,15 +58,27 @@ services:
       - SAFETY_MODEL=${SAFETY_MODEL:-}
       - OLLAMA_URL=http://ollama:11434
     entrypoint: >
-        python -m llama_stack.distribution.server.server /root/my-run.yaml \
-        --port ${LLAMA_STACK_PORT:-5001}
+        python -m llama_stack.distribution.server.server --yaml-config /root/run.yaml --port ${LLAMA_STACK_PORT:-5001}
     deploy:
       restart_policy:
         condition: on-failure
         delay: 10s
         max_attempts: 3
         window: 60s
+  notebook:
+    image: python:3.12
+    depends_on:
+      llamastack:
+        condition: service_started
+    network_mode: ${NETWORK_MODE:-bridge}
+    volumes:
+      - ./notebook.ipynb:/app/notebook.ipynb
+    command: >
+      bash -c "pip install llama-stack-client jupyter nbconvert &&
+      jupyter nbconvert --to python /app/notebook.ipynb &&
+      python /app/notebook.py"
+    restart: "no"
+
 volumes:
-  ollama:
   ollama-init:
-  llamastack:
\ No newline at end of file
+  llamastack:
diff --git a/examples/llama_stack_client_examples/docker.compose.yaml b/examples/llama_stack_client_examples/docker.compose.yaml
deleted file mode 100644
index 76684f37b..000000000
--- a/examples/llama_stack_client_examples/docker.compose.yaml
+++ /dev/null
@@ -1,85 +0,0 @@
-services:
-  # Ollama server service
-  ollama:
-    image: ollama/ollama:latest
-    container_name: ollama_server
-    ports:
-      - "11434:11434" # Map Ollama's port to host
-    environment:
-      OLLAMA_DEBUG: 1
-    volumes:
-      - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models)
-    deploy:
-      resources:
-        limits:
-          memory: 16G    # Set maximum memory
-        reservations:
-          memory: 12G    # Set minimum memory reservation
-    entrypoint: ["ollama", "serve"] # Start the Ollama server
-    restart: always # Ensure Ollama server restarts on failure
-    healthcheck:
-      # ugh, no CURL in ollama image
-      test: ["CMD", "curl", "-f", "http://ollama:11434"]
-      interval: 10s
-      timeout: 5s
-      retries: 5
-
-  # Ephemeral service to trigger model download
-  # model_downloader:
-  #   image: curlimages/curl:latest # Use a lightweight image with curl
-  #   depends_on:
-  #     - ollama # Ensure the Ollama server starts first
-  #   entrypoint: >
-  #     sh -c "sleep 5 &&
-  #     curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'"
-  #   restart: "no" # Ensure this service doesn't restart
-
-  ollama-init:
-    image: ollama/ollama:latest
-    depends_on:
-      ollama:
-        condition: service_started
-    network_mode: bridge
-    container_name: ollama-init
-    environment:
-      - OLLAMA_HOST=host.docker.internal
-      - INFERENCE_MODEL=llama3.2:3b-instruct-fp16
-    volumes:
-      - ~/.ollama:/root/.ollama
-      - ./pull-models.sh:/root/pull-models.sh
-    entrypoint: ["/root/pull-models.sh"]
-
-
-  # tester:
-  #   image: curlimages/curl:latest # Use a lightweight image with curl
-  #   depends_on:
-  #     - model_downloader # Ensure the Ollama server starts first
-  #   entrypoint: >
-  #     sh -c "sleep 5 &&
-  #     curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'"
-  #   restart: "no" # Ensure this service doesn't restart
-
-  llama-stack:
-    depends_on:
-      ollama:
-        condition: service_started
-      ollama-init:
-        condition: service_started
-    image: llamastack/distribution-ollama
-    container_name: llama_stack_server
-    ports:
-      - "5001:5001"
-    volumes:
-      - "~/.ollama/models:/root/.ollama"
-      - "./llama-stack-server-config.yaml:/root/my-run.yaml"
-    environment:
-      - INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct
-      - OLLAMA_URL=http://ollama:11434
-    command: >
-      --yaml-config /root/my-run.yaml
-      --port 5001
-    platform: linux/amd64
-
-networks:
-  default:
-    driver: bridge
diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks.config.yaml
similarity index 100%
rename from examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml
rename to examples/llama_stack_client_examples/fireworks.config.yaml
diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml
deleted file mode 100644
index c51a454eb..000000000
--- a/examples/llama_stack_client_examples/llama-stack-server-config.yaml
+++ /dev/null
@@ -1,54 +0,0 @@
-version: '2'
-image_name: ollama
-docker_image: null
-conda_env: ollama
-apis:
-- agents
-- inference
-- memory
-- safety
-- telemetry
-providers:
-  inference:
-  - provider_id: ollama
-    provider_type: remote::ollama
-    config:
-      url: ${env.OLLAMA_URL}
-  memory:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config: {}
-metadata_store:
-  namespace: null
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db
-models:
-- metadata: {}
-  model_id: ${env.INFERENCE_MODEL}
-  provider_id: ollama
-  provider_model_id: null
-shields: []
-memory_banks: []
-datasets: []
-scoring_fns: []
-eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
deleted file mode 100644
index 75c10f74b..000000000
--- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md
+++ /dev/null
@@ -1,16 +0,0 @@
-# TLDR
-
-Here are the links of where to find Fireworks integration examples in the Llama Stack repository
-
-*Disclaimer: This Llama Stack Server + Fireworks setup has been tested but NOT shown to ever work end-2-end*  
-
-##
-
-https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml
-https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks
-
-##
-
-```sh
-docker-compose -f fireworks.compose.yaml up
-```
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/README.md b/examples/llama_stack_client_examples/llama_stack_ollama/README.md
deleted file mode 100644
index 751820d8a..000000000
--- a/examples/llama_stack_client_examples/llama_stack_ollama/README.md
+++ /dev/null
@@ -1,4 +0,0 @@
-
-chmod +x pull_models.sh
-
-docker-compose -f compose.yaml up
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/notebook.ipynb
similarity index 99%
rename from examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
rename to examples/llama_stack_client_examples/notebook.ipynb
index 384290cc8..f2b2fc5c8 100644
--- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb
+++ b/examples/llama_stack_client_examples/notebook.ipynb
@@ -15,6 +15,16 @@
     "First let's install the required packages"
    ]
   },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "python -m venv .venv\n",
+    "source .venv/bin/activate"
+   ]
+  },
   {
    "cell_type": "code",
    "execution_count": 1,
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml b/examples/llama_stack_client_examples/run-safety-shield.yaml
similarity index 100%
rename from examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml
rename to examples/llama_stack_client_examples/run-safety-shield.yaml
diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml b/examples/llama_stack_client_examples/run.yaml
similarity index 96%
rename from examples/llama_stack_client_examples/llama_stack_ollama/run.yaml
rename to examples/llama_stack_client_examples/run.yaml
index 32137fd67..63729c0a1 100644
--- a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml
+++ b/examples/llama_stack_client_examples/run.yaml
@@ -46,9 +46,8 @@ models:
 - metadata: {}
   model_id: ${env.INFERENCE_MODEL}
   provider_id: ollama
-  provider_model_id: null
 shields: []
 memory_banks: []
 datasets: []
 scoring_fns: []
-eval_tasks: []
\ No newline at end of file
+eval_tasks: []

From 034e25c25ce259ed7b40af475f0bf460686180e5 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 10:30:47 -0600
Subject: [PATCH 38/69] readme

Signed-off-by: Teo <teocns@gmail.com>
---
 .../llama_stack_client_examples/README.md     | 21 +------------------
 1 file changed, 1 insertion(+), 20 deletions(-)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 9c6a4ca40..54efd34f4 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -7,26 +7,7 @@ Run Llama Stack with Ollama - either local or containerized.
 Just run:
 
 ```bash
-./start-stack.sh
-```
-
-The script will:
-1. Check if Ollama is already running locally
-2. Check if Llama Stack server is already running
-3. Guide you through what needs to be started
-
-Example outputs:
-
-```bash
-# Scenario 1: Ollama running locally
-✓ Ollama server is running locally
-✗ No Llama Stack server detected
-Start Llama Stack server? [Y/n] 
-
-# Scenario 2: Nothing running
-✗ No local Ollama server detected
-✗ No Llama Stack server detected
-No Ollama server detected. Start both Ollama and Llama Stack? [Y/n] 
+docker compose up
 ```
 
 ## Environment Variables

From 796b6bc004f9b2fb4fc53eba0358e00acd9557f9 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 10:37:01 -0600
Subject: [PATCH 39/69] maintain filename standards under /root

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index d2add1198..3a17e9340 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -35,8 +35,8 @@ services:
       - SAFETY_MODEL=${SAFETY_MODEL:-}
     volumes:
       - ~/.ollama:/root/.ollama
-      - ./pull-models.sh:/pull-models.sh
-    entrypoint: ["/pull-models.sh"]
+      - ./pull-models.sh:/root/pull-models.sh
+    entrypoint: ["/root/pull-models.sh"]
 
   llamastack:
     depends_on:

From 3a8ca517cc0ec268a660545ccc5dbb4442ce294a Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 10:45:42 -0600
Subject: [PATCH 40/69] ollama: healthcheck on localhost rather; healthcheck
 relaxed

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 13 +++++--------
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 3a17e9340..3f3ea1a32 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -18,16 +18,15 @@ services:
     healthcheck:
       # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶
       # - fine 
-      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: ollama\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect ollama:11434 2>/dev/null | grep \"HTTP/1.1 200\""]
-      interval: 10s
+      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect localhost:11434 2>/dev/null | grep \"HTTP/1.1 200\""]
+      interval: 3s
       timeout: 5s
       retries: 5
 
   ollama-init:
     image: ollama/ollama:latest
     depends_on:
-      ollama:
-        condition: service_healthy
+      - ollama
     network_mode: ${NETWORK_MODE:-bridge}
     environment:
       - OLLAMA_HOST=ollama
@@ -40,10 +39,8 @@ services:
 
   llamastack:
     depends_on:
-      ollama:
-        condition: service_started
-      ollama-init:
-        condition: service_started
+      - ollama
+      - ollama-init
     image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
     network_mode: ${NETWORK_MODE:-bridge}
     volumes:

From 998231eb321795aaf69caa2d8974329da60a3c32 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:02:08 -0600
Subject: [PATCH 41/69] progress, api hitting, network ok

Signed-off-by: Teo <teocns@gmail.com>
---
 .../llama_stack_client_examples/compose.yaml  | 58 ++++++++++---------
 1 file changed, 32 insertions(+), 26 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 3f3ea1a32..5a7bcb78f 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -1,33 +1,32 @@
 services:
   ollama:
+    hostname: ollama
+    # extra_hosts:
+      # - "host.docker.internal:host-gateway"
     image: ollama/ollama:latest
-    network_mode: ${NETWORK_MODE:-bridge}
     volumes:
       - ~/.ollama:/root/.ollama
-    ports:
-      - "11434:11434"
     environment:
       OLLAMA_DEBUG: 1
     command: []
     deploy:
       resources:
         limits:
-          memory: 8G    # Set maximum memory
+          memory: 4G    # Set maximum memory
         reservations:
-          memory: 8G    # Set minimum memory reservation
+          memory: 2G    # Set minimum memory reservation
     healthcheck:
-      # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶
-      # - fine 
-      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect localhost:11434 2>/dev/null | grep \"HTTP/1.1 200\""]
+      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n\" | nc localhost 11434 | grep -q \"HTTP/1.1 200\""]
       interval: 3s
       timeout: 5s
       retries: 5
+    networks:
+      - ollama-network
 
   ollama-init:
     image: ollama/ollama:latest
     depends_on:
       - ollama
-    network_mode: ${NETWORK_MODE:-bridge}
     environment:
       - OLLAMA_HOST=ollama
       - INFERENCE_MODEL=${INFERENCE_MODEL}
@@ -36,13 +35,16 @@ services:
       - ~/.ollama:/root/.ollama
       - ./pull-models.sh:/root/pull-models.sh
     entrypoint: ["/root/pull-models.sh"]
+    networks:
+      - ollama-network
 
   llamastack:
     depends_on:
-      - ollama
-      - ollama-init
+      ollama:
+        condition: service_healthy
+      ollama-init:
+        condition: service_started
     image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
-    network_mode: ${NETWORK_MODE:-bridge}
     volumes:
       - ~/.llama:/root/.llama
       # Link to ollama run.yaml file
@@ -62,20 +64,24 @@ services:
         delay: 10s
         max_attempts: 3
         window: 60s
-  notebook:
-    image: python:3.12
-    depends_on:
-      llamastack:
-        condition: service_started
-    network_mode: ${NETWORK_MODE:-bridge}
-    volumes:
-      - ./notebook.ipynb:/app/notebook.ipynb
-    command: >
-      bash -c "pip install llama-stack-client jupyter nbconvert &&
-      jupyter nbconvert --to python /app/notebook.ipynb &&
-      python /app/notebook.py"
-    restart: "no"
-
+  # notebook:
+  #   image: python:3.12
+  #   depends_on:
+  #     llamastack:
+  #       condition: service_started
+  #   network_mode: ${NETWORK_MODE:-bridge}
+  #   volumes:
+  #     - ./notebook.ipynb:/app/notebook.ipynb
+  #   command: >
+  #     bash -c "pip install llama-stack-client jupyter nbconvert &&
+  #     jupyter nbconvert --to python /app/notebook.ipynb &&
+  #     python /app/notebook.py"
+  #   restart: "no"
+networks:
+  llama-stack:
+    driver: bridge
+  ollama-network:
+    driver: bridge
 volumes:
   ollama-init:
   llamastack:

From e6d2200634ba009b6afb9e3834d625ceb6f670bc Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:02:08 -0600
Subject: [PATCH 42/69] Is this path relevant?

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 5a7bcb78f..836918456 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -48,7 +48,7 @@ services:
     volumes:
       - ~/.llama:/root/.llama
       # Link to ollama run.yaml file
-      - ~/local/llama-stack/:/app/llama-stack-source
+      # - ~/local/llama-stack/:/app/llama-stack-source
       - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml
     ports:
       - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"

From eef3730b253d3e667d7b0339d7a760b0d5b109f9 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:05:22 -0600
Subject: [PATCH 43/69] INFERENCE_MODEL default

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 836918456..9b6baa654 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -29,7 +29,7 @@ services:
       - ollama
     environment:
       - OLLAMA_HOST=ollama
-      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:3b-instruct-fp16}
       - SAFETY_MODEL=${SAFETY_MODEL:-}
     volumes:
       - ~/.ollama:/root/.ollama

From 537f95047b8e9ed3d7ea638dbfe2a92a830ecf94 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:07:55 -0600
Subject: [PATCH 44/69] unused network

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 2 --
 1 file changed, 2 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 9b6baa654..326922f26 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -78,8 +78,6 @@ services:
   #     python /app/notebook.py"
   #   restart: "no"
 networks:
-  llama-stack:
-    driver: bridge
   ollama-network:
     driver: bridge
 volumes:

From 0c2d9d9dfb3bbce51fa71d22f956c206d9efebe9 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:16:04 -0600
Subject: [PATCH 45/69] host:port instead of URL in run.yaml

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/run.yaml | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml
index 63729c0a1..fd5fb95c6 100644
--- a/examples/llama_stack_client_examples/run.yaml
+++ b/examples/llama_stack_client_examples/run.yaml
@@ -13,7 +13,8 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      url: ${env.OLLAMA_URL:http://localhost:11434}
+      host: ollama
+      port: 11434
   memory:
   - provider_id: faiss
     provider_type: inline::faiss

From 68218d26ca4c534b2fee2077f10dd7f9fd52b398 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:26:25 -0600
Subject: [PATCH 46/69] seems fixed

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 14 +++++++++-----
 1 file changed, 9 insertions(+), 5 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 326922f26..526678bc9 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -1,8 +1,8 @@
 services:
   ollama:
     hostname: ollama
-    # extra_hosts:
-      # - "host.docker.internal:host-gateway"
+    extra_hosts:
+      - "host.docker.internal:host-gateway"
     image: ollama/ollama:latest
     volumes:
       - ~/.ollama:/root/.ollama
@@ -16,7 +16,7 @@ services:
         reservations:
           memory: 2G    # Set minimum memory reservation
     healthcheck:
-      test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n\" | nc localhost 11434 | grep -q \"HTTP/1.1 200\""]
+      test: ["CMD", "bash", "-c", "</dev/tcp/localhost/11434"]
       interval: 3s
       timeout: 5s
       retries: 5
@@ -29,7 +29,7 @@ services:
       - ollama
     environment:
       - OLLAMA_HOST=ollama
-      - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:3b-instruct-fp16}
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:latest}
       - SAFETY_MODEL=${SAFETY_MODEL:-}
     volumes:
       - ~/.ollama:/root/.ollama
@@ -44,7 +44,8 @@ services:
         condition: service_healthy
       ollama-init:
         condition: service_started
-    image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
+    image: ${LLAMA_STACK_IMAGE:-llamastack/llamastack-local-cpu}
+    platform: linux/arm64
     volumes:
       - ~/.llama:/root/.llama
       # Link to ollama run.yaml file
@@ -77,6 +78,9 @@ services:
   #     jupyter nbconvert --to python /app/notebook.ipynb &&
   #     python /app/notebook.py"
   #   restart: "no"
+    networks:
+      - ollama-network
+
 networks:
   ollama-network:
     driver: bridge

From ecee5631c5fb346da020650d2d56d9f7ea2e7fee Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 11:33:19 -0600
Subject: [PATCH 47/69] on non Apple silicon: must try between
 llamastack-local-cpu and distirbution-ollama images

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 526678bc9..08fc88f53 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -44,8 +44,7 @@ services:
         condition: service_healthy
       ollama-init:
         condition: service_started
-    image: ${LLAMA_STACK_IMAGE:-llamastack/llamastack-local-cpu}
-    platform: linux/arm64
+    image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
     volumes:
       - ~/.llama:/root/.llama
       # Link to ollama run.yaml file

From 3f7fc6800c1b09402c382d7ccd3a903f8a0036c5 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 12:01:10 -0600
Subject: [PATCH 48/69] providers.config.url | ollama HOST

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/run.yaml | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml
index fd5fb95c6..4d148ad95 100644
--- a/examples/llama_stack_client_examples/run.yaml
+++ b/examples/llama_stack_client_examples/run.yaml
@@ -13,8 +13,7 @@ providers:
   - provider_id: ollama
     provider_type: remote::ollama
     config:
-      host: ollama
-      port: 11434
+      url: ${env.OLLAMA_URL:http://ollama:11434}
   memory:
   - provider_id: faiss
     provider_type: inline::faiss

From 95878e253d7b6b0780938182c8a1eb80d47568aa Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 12:33:28 -0600
Subject: [PATCH 49/69] save

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 08fc88f53..6ef8d394c 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -53,7 +53,7 @@ services:
     ports:
       - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
     environment:
-      - INFERENCE_MODEL=${INFERENCE_MODEL}
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-} # ?
       - SAFETY_MODEL=${SAFETY_MODEL:-}
       - OLLAMA_URL=http://ollama:11434
     entrypoint: >

From 42e4c4fee1ffc4bb8b6fc30c9b3a3bd83123d646 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 13:11:34 -0600
Subject: [PATCH 50/69] env.tpl

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/.env.tpl | 5 +++++
 1 file changed, 5 insertions(+)
 create mode 100644 examples/llama_stack_client_examples/.env.tpl

diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl
new file mode 100644
index 000000000..06ef2065d
--- /dev/null
+++ b/examples/llama_stack_client_examples/.env.tpl
@@ -0,0 +1,5 @@
+INFERENCE_MODEL=meta-llama/Llama-3.2-1B
+OLLAMA_MODEL=llama3.2:1b-instruct-fp16
+
+
+

From a3a81fcf90a0a43f5ab67b16bb734ebdf25ca875 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 13:11:44 -0600
Subject: [PATCH 51/69] right configs

Signed-off-by: Teo <teocns@gmail.com>
---
 examples/llama_stack_client_examples/compose.yaml | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index 6ef8d394c..d542d93e7 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -29,7 +29,7 @@ services:
       - ollama
     environment:
       - OLLAMA_HOST=ollama
-      - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:latest}
+      - INFERENCE_MODEL=${OLLAMA_MODEL:-llama3.2:3b-instruct-fp16}
       - SAFETY_MODEL=${SAFETY_MODEL:-}
     volumes:
       - ~/.ollama:/root/.ollama
@@ -43,7 +43,7 @@ services:
       ollama:
         condition: service_healthy
       ollama-init:
-        condition: service_started
+        condition: service_completed_successfully
     image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
     volumes:
       - ~/.llama:/root/.llama
@@ -53,7 +53,7 @@ services:
     ports:
       - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
     environment:
-      - INFERENCE_MODEL=${INFERENCE_MODEL:-} # ?
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct} # ?
       - SAFETY_MODEL=${SAFETY_MODEL:-}
       - OLLAMA_URL=http://ollama:11434
     entrypoint: >

From ebb2ea1566b2692868d0a381032e2debb6276509 Mon Sep 17 00:00:00 2001
From: Teo <teocns@gmail.com>
Date: Tue, 3 Dec 2024 13:18:29 -0600
Subject: [PATCH 52/69] DONE

Signed-off-by: Teo <teocns@gmail.com>
---
 .../llama_stack_client_examples/README.md     | 22 +++++++++++++++----
 1 file changed, 18 insertions(+), 4 deletions(-)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index 54efd34f4..d558f7e2c 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -15,19 +15,25 @@ docker compose up
 | Variable | Description | Default |
 |----------|-------------|---------|
 | `LLAMA_STACK_PORT` | Server port | 5001 |
-| `INFERENCE_MODEL` | Model ID | meta-llama/Llama-3.2-3B-Instruct |
+| `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct |
+| `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 |
+| ⚠️ **Important:** | The model IDs must match their respective formats - Ollama and Llama Stack use different naming conventions for the same models | - |
 | `SAFETY_MODEL` | Optional safety model | - |
 | `NETWORK_MODE` | Docker network mode | auto-configured |
 | `OLLAMA_URL` | Ollama API URL | auto-configured |
 
-## Notes
+## Common Gotchas
+
+1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently - for instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama.
+
+2. Ensure Docker has sufficient system memory allocation to run properly
 
 ```
 llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list
 ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
 ┃ identifier                       ┃ provider_id ┃ provider_resource_id      ┃ metadata ┃
 ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
-│ meta-llama/Llama-3.2-3B-Instruct │ ollama      │ llama3.2:3b-instruct-fp16 │          │
+│ meta-llama/Llama-3.2-1B-Instruct │ ollama      │ llama3.2:1b-instruct-fp16 │          │
 └──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘
 ```
 
@@ -42,6 +48,14 @@ llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list
 
 ## References
 
+- [Download Ollama](https://ollama.com/)
 - [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md)
 - [Llama Stack Docs](https://llama-stack.readthedocs.io)
-- [Ollama](https://ollama.com/)
+- [Ollama Run YAML Template](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml)
+- [Llama Stack Documentation](https://llama-stack.readthedocs.io)
+- [Llama Stack Client Python](https://github.com/meta-llama/llama-stack-client-python)
+- [Llama Stack Repository](https://github.com/meta-llama/llama-stack)
+- [Meta Models Documentation](https://www.llama.com/docs/getting_the_models/meta/)
+- [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html)
+- [Agents Example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py)
+- [Model Download Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html)

From 77a98a3d29c7d159bb60c1492626daa1222c16cb Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Tue, 3 Dec 2024 18:36:52 -0500
Subject: [PATCH 53/69] pushing enhancement before merge

---
 agentops/llms/llama_stack_client.py           |   2 +-
 .../compose.fireworks.yaml                    |  16 --
 .../fireworks.config.yaml                     |  59 -----
 .../notebook.ipynb                            | 205 +++++++++---------
 .../llama_stack_client_canary/agent_canary.py |   6 +-
 .../inference_canary_2.py                     |   4 +-
 tests/llama_stack/test_llama_stack.py         |  80 +++----
 7 files changed, 141 insertions(+), 231 deletions(-)
 delete mode 100644 examples/llama_stack_client_examples/compose.fireworks.yaml
 delete mode 100644 examples/llama_stack_client_examples/fireworks.config.yaml

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index e5c2c1559..ab2f1e356 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -60,7 +60,7 @@ def handle_stream_chunk(chunk: dict):
                                 {"content": message.content, "role": message.role} for message in kwargs["messages"]
                             ]
                             llm_event.agent_id = check_call_stack_for_agent_id()
-                            llm_event.model = metadata.get("model_id", "Unable to identify model")
+                            llm_event.model = kwargs["model_id"]
                             llm_event.prompt_tokens = None
                             llm_event.completion = accum_delta or kwargs["completion"]
                             llm_event.completion_tokens = None
diff --git a/examples/llama_stack_client_examples/compose.fireworks.yaml b/examples/llama_stack_client_examples/compose.fireworks.yaml
deleted file mode 100644
index 4eb5aff0b..000000000
--- a/examples/llama_stack_client_examples/compose.fireworks.yaml
+++ /dev/null
@@ -1,16 +0,0 @@
-services:
-  llamastack:
-    image: llamastack/distribution-fireworks
-    network_mode: "host"
-    volumes:
-      - ~/.llama:/root/.llama
-      - ./run.yaml:/root/run.yaml
-    ports:
-      - "5000:5000"
-    entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/run.yaml"
-    deploy:
-      restart_policy:
-        condition: on-failure
-        delay: 3s
-        max_attempts: 5
-        window: 60s
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/fireworks.config.yaml b/examples/llama_stack_client_examples/fireworks.config.yaml
deleted file mode 100644
index cb9dd2cbc..000000000
--- a/examples/llama_stack_client_examples/fireworks.config.yaml
+++ /dev/null
@@ -1,59 +0,0 @@
-version: '2'
-image_name: fireworks
-docker_image: null
-conda_env: fireworks
-apis:
-- agents
-- inference
-- memory
-- safety
-- telemetry
-providers:
-  inference:
-  - provider_id: fireworks
-    provider_type: remote::fireworks
-    config:
-      url: "https://api.fireworks.ai/inference"
-      api_key: "<FIREWORKS_API_KEY>"
-  memory:
-  - provider_id: faiss
-    provider_type: inline::faiss
-    config:
-      kvstore:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db
-  safety:
-  - provider_id: llama-guard
-    provider_type: inline::llama-guard
-    config: {}
-  agents:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config:
-      persistence_store:
-        type: sqlite
-        namespace: null
-        db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db
-  telemetry:
-  - provider_id: meta-reference
-    provider_type: inline::meta-reference
-    config: {}
-metadata_store:
-  namespace: null
-  type: sqlite
-  db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db
-models:
-- metadata: {}
-  model_id: meta-llama/Llama-3.2-1B-Instruct
-  provider_id: null
-  provider_model_id: fireworks/llama-v3p2-1b-instruct
-shields:
-- params: null
-  shield_id: meta-llama/Llama-Guard-3-8B
-  provider_id: null
-  provider_shield_id: null
-memory_banks: []
-datasets: []
-scoring_fns: []
-eval_tasks: []
\ No newline at end of file
diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/notebook.ipynb
index f2b2fc5c8..4f8b96e68 100644
--- a/examples/llama_stack_client_examples/notebook.ipynb
+++ b/examples/llama_stack_client_examples/notebook.ipynb
@@ -19,22 +19,31 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "ename": "SyntaxError",
+     "evalue": "invalid syntax (2472932708.py, line 1)",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;36m  Cell \u001b[0;32mIn[1], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    python -m venv .venv\u001b[0m\n\u001b[0m              ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
+     ]
+    }
+   ],
    "source": [
-    "python -m venv .venv\n",
-    "source .venv/bin/activate"
+    "# python -m venv .venv\n",
+    "# source .venv/bin/activate"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 2,
    "metadata": {},
    "outputs": [
     {
      "name": "stdout",
      "output_type": "stream",
      "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n",
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n",
       "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
       "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
       "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
@@ -67,13 +76,13 @@
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
       "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n",
+      "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n",
       "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n",
       "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n",
       "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n",
       "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n",
-      "Requirement already satisfied: llama-models>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n",
-      "Requirement already satisfied: llama-stack-client>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n",
+      "Requirement already satisfied: llama-models>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n",
+      "Requirement already satisfied: llama-stack-client>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n",
       "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n",
       "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n",
       "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n",
@@ -81,18 +90,18 @@
       "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n",
       "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n",
       "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (6.0.2)\n",
-      "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (3.1.4)\n",
-      "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (0.8.0)\n",
-      "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (11.0.0)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.6.2.post1)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.9.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (24.9.0)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.67.0)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.12.2)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (6.0.2)\n",
+      "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (3.1.4)\n",
+      "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (0.8.0)\n",
+      "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (11.0.0)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.6.2.post1)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.9.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (24.9.0)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.67.0)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.12.2)\n",
       "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n",
       "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n",
       "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n",
@@ -110,13 +119,13 @@
       "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n",
       "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n",
       "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
-      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.56->llama-stack) (3.0.2)\n",
-      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.56->llama-stack) (2024.11.6)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.56->llama-stack) (1.16.0)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.57->llama-stack) (3.0.2)\n",
+      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.57->llama-stack) (2024.11.6)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.57->llama-stack) (1.16.0)\n",
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
@@ -152,6 +161,27 @@
       "\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
       "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (0.49b2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (4.12.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
+      "\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
+      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
       "Note: you may need to restart the kernel to use updated packages.\n"
      ]
     }
@@ -161,7 +191,9 @@
     "%pip install -U llama-stack\n",
     "%pip install -U agentops\n",
     "%pip install -U python-dotenv\n",
-    "%pip install -U fastapi\n"
+    "%pip install -U fastapi\n",
+    "%pip install opentelemetry-api\n",
+    "%pip install opentelemetry-sdk\n"
    ]
   },
   {
@@ -173,7 +205,7 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": 3,
    "metadata": {},
    "outputs": [],
    "source": [
@@ -190,7 +222,7 @@
     "load_dotenv()\n",
     "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\"\n",
     "\n",
-    "# agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
     "\n",
     "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
     "port = 5001 # LLAMA_STACK_PORT\n",
@@ -206,40 +238,36 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Completion Example"
+    "# Inference Canary 1 - Completion with Streaming"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": 4,
    "metadata": {},
    "outputs": [
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5f22f2fd-2561-4b8d-8d8c-1ae875d8075c\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n"
      ]
     },
     {
-     "ename": "InternalServerError",
-     "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:207\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    206\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 207\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1250\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1251\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1258\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1260\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1261\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1262\u001b[0m     )\n\u001b[0;32m-> 1263\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1055\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1057\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1061\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1062\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1066\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1067\u001b[0m )\n",
-      "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
+      "\n",
+      "\u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[33m of\u001b[0m\u001b[33m night\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n"
      ]
     }
    ],
@@ -248,15 +276,17 @@
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",
-    "            content=\"write me a 3 word poem about the moon\",\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
     "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
-    "    stream=False\n",
+    "    stream=True\n",
     ")\n",
     "\n",
-    "print(f\"> Response: {response}\")\n",
+    "async for log in EventLogger().log(response):\n",
+    "    log.print()\n",
+    "\n",
     "agentops.end_session(\"Success\")"
    ]
   },
@@ -264,76 +294,49 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "## Completion with Streaming Example"
+    "# Inference Canary Example 2 - Completion without Streaming"
    ]
   },
   {
    "cell_type": "code",
-   "execution_count": 6,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
-      "\n",
-      "\u001b[0m\u001b[33m\"L\u001b[0m\u001b[33munar\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.1s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
     "    messages=[\n",
     "        UserMessage(\n",
-    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            content=\"write me a 3 word poem about the moon\",\n",
     "            role=\"user\",\n",
     "        ),\n",
     "    ],\n",
     "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
-    "    stream=True\n",
+    "    stream=False\n",
     ")\n",
     "\n",
-    "async for log in EventLogger().log(response):\n",
-    "    log.print()\n",
-    "\n",
+    "print(f\"> Response: {response}\")\n",
     "agentops.end_session(\"Success\")"
    ]
   },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Agent Canary Example"
+   ]
+  },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 11,
    "metadata": {},
    "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "_!_!_ LlamaStackClientProvider _!_!_\n",
-      "_!_!_ override _!_!_\n"
-     ]
-    },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n"
      ]
     },
     {
@@ -342,16 +345,16 @@
      "text": [
       "No available shields. Disable safety.\n",
       "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
-      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x10ee067a0>\n",
-      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x10ee70900>\n"
+      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x1304e75a0>\n",
+      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x1304e7140>\n"
      ]
     },
     {
      "name": "stderr",
      "output_type": "stream",
      "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n"
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n"
      ]
     }
    ],
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index ea0042d36..73513cfa5 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -12,9 +12,9 @@
 LLAMA_STACK_PORT = 5001
 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
-import debugpy
-debugpy.listen(5678)
-debugpy.wait_for_client()
+# import debugpy
+# debugpy.listen(5678)
+# debugpy.wait_for_client()
 
 agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False)
 
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
index f18db96bf..b4a6aea3f 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -41,7 +41,7 @@ async def stream_test():
 def main():
     agentops.start_session()
 
-    client.inference.chat_completion(
+    response = client.inference.chat_completion(
         messages=[
             UserMessage(
                 content="hello world, write me a 3 word poem about the moon",
@@ -51,6 +51,8 @@ def main():
         model_id="meta-llama/Llama-3.2-1B-Instruct",
         stream=False,
     )
+
+    print(response)
     
     agentops.end_session(end_state="Success")
 
diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py
index b93a18567..47b80e767 100644
--- a/tests/llama_stack/test_llama_stack.py
+++ b/tests/llama_stack/test_llama_stack.py
@@ -1,57 +1,37 @@
-import pytest
-import requests_mock
-import time
-
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.types import UserMessage
 from llama_stack_client.lib.inference.event_logger import EventLogger
-
-
-@pytest.fixture(autouse=True)
-def setup_teardown():
-    yield
-
-
-@pytest.fixture(autouse=True, scope="function")
-def mock_req():
-    with requests_mock.Mocker() as m:
-        url = "http://localhost:5001"
-        m.post(url + "/v2/create_events", json={"status": "ok"})
-        m.post(url + "/v2/create_session", json={"status": "success", "jwt": "some_jwt"})
-        
-        yield m
-
+from unittest.mock import MagicMock
 
 class TestLlamaStack:
     def setup_method(self):
-        
-        print("...Setting up LlamaStackClient...")
-        
-        host = "0.0.0.0" # LLAMA_STACK_HOST
-        port = 5001 # LLAMA_STACK_PORT
-
-        full_host = f"http://{host}:{port}"
-
-        self.client = LlamaStackClient(
-            base_url=f"{full_host}",
-        )
-
-
-    def test_llama_stack_inference(self, mock_req):
-        
-        response = self.client.inference.chat_completion(
-            messages=[
-                UserMessage(
-                    content="hello world, write me a 3 word poem about the moon",
-                    role="user",
-                ),
-            ],
-            model_id="meta-llama/Llama-3.2-1B-Instruct",
-            stream=False,
+        self.client = LlamaStackClient()
+        self.client.inference = MagicMock()
+        self.client.inference.chat_completion = MagicMock(return_value=[
+            {
+                "choices": [
+                    {
+                        "message": {
+                            "content": "Moonlight whispers softly",
+                            "role": "assistant",
+                        }
+                    }
+                ]
+            }
+        ])
+
+
+    def test_llama_stack_inference(self):
+        self.client.inference.chat_completion.assert_not_called()
+        self.client.inference.chat_completion(
+                messages=[
+                    UserMessage(
+                        content="hello world, write me a 3 word poem about the moon",
+                        role="user",
+                    ),
+                ],
+                model_id="meta-llama/Llama-3.2-1B-Instruct",
+                stream=False,
         )
-
-        # async for log in EventLogger().log(response):
-        #   log.print()
-
-        print(response)
-        
+        self.client.inference.chat_completion.assert_called_once()
+        
\ No newline at end of file

From c77339e36cf9754091a9e86afb89fe741ae10c12 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 18:01:40 +0530
Subject: [PATCH 54/69] add `Instruct` to model name

---
 examples/llama_stack_client_examples/.env.tpl | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl
index 06ef2065d..5099720e1 100644
--- a/examples/llama_stack_client_examples/.env.tpl
+++ b/examples/llama_stack_client_examples/.env.tpl
@@ -1,4 +1,4 @@
-INFERENCE_MODEL=meta-llama/Llama-3.2-1B
+INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct
 OLLAMA_MODEL=llama3.2:1b-instruct-fp16
 
 

From 2539b7b2c7d5fe64d0398552033ed98e04d4a55b Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 18:02:08 +0530
Subject: [PATCH 55/69] clean and increase memory

---
 .../llama_stack_client_examples/compose.yaml  | 21 +++----------------
 1 file changed, 3 insertions(+), 18 deletions(-)

diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml
index d542d93e7..2139a6620 100644
--- a/examples/llama_stack_client_examples/compose.yaml
+++ b/examples/llama_stack_client_examples/compose.yaml
@@ -12,9 +12,9 @@ services:
     deploy:
       resources:
         limits:
-          memory: 4G    # Set maximum memory
+          memory: 8G
         reservations:
-          memory: 2G    # Set minimum memory reservation
+          memory: 4G
     healthcheck:
       test: ["CMD", "bash", "-c", "</dev/tcp/localhost/11434"]
       interval: 3s
@@ -47,13 +47,11 @@ services:
     image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama}
     volumes:
       - ~/.llama:/root/.llama
-      # Link to ollama run.yaml file
-      # - ~/local/llama-stack/:/app/llama-stack-source
       - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml
     ports:
       - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}"
     environment:
-      - INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct} # ?
+      - INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct}
       - SAFETY_MODEL=${SAFETY_MODEL:-}
       - OLLAMA_URL=http://ollama:11434
     entrypoint: >
@@ -64,19 +62,6 @@ services:
         delay: 10s
         max_attempts: 3
         window: 60s
-  # notebook:
-  #   image: python:3.12
-  #   depends_on:
-  #     llamastack:
-  #       condition: service_started
-  #   network_mode: ${NETWORK_MODE:-bridge}
-  #   volumes:
-  #     - ./notebook.ipynb:/app/notebook.ipynb
-  #   command: >
-  #     bash -c "pip install llama-stack-client jupyter nbconvert &&
-  #     jupyter nbconvert --to python /app/notebook.ipynb &&
-  #     python /app/notebook.py"
-  #   restart: "no"
     networks:
       - ollama-network
 

From d541d436f60a2d21ac99bb43abfa52b5d9a50101 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:16:42 +0530
Subject: [PATCH 56/69] test cleanup

---
 .../providers/llama_stack_client_canary/agent_canary.py     | 4 ++--
 .../llama_stack_client_canary/inference_canary_1.py         | 4 ----
 .../llama_stack_client_canary/inference_canary_2.py         | 6 +-----
 3 files changed, 3 insertions(+), 11 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 73513cfa5..ab6399cbc 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -78,8 +78,8 @@ async def agent_test():
 
         print("Response: ", response)
 
-        async for log in EventLogger().log(response):
-            log.print()
+        # for log in EventLogger().log(response):
+        #     log.print()
 
 
 agentops.start_session()
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
index 66717f898..afbe48ff8 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
@@ -10,10 +10,6 @@
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
-# import debugpy
-# debugpy.listen(5678)
-# debugpy.wait_for_client()
-
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT
 
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
index b4a6aea3f..03a3f51ab 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -9,10 +9,6 @@
 
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
-# import debugpy
-# debugpy.listen(5678)
-# debugpy.wait_for_client()
-
 host = "0.0.0.0"  # LLAMA_STACK_HOST
 port = 5001  # LLAMA_STACK_PORT
 
@@ -52,7 +48,7 @@ def main():
         stream=False,
     )
 
-    print(response)
+    print(response.completion_message.content)
     
     agentops.end_session(end_state="Success")
 

From f9a6b0700f5dc09a7fef05b415aba8c1c9d85016 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:18:09 +0530
Subject: [PATCH 57/69] clean notebook

---
 .../notebook.ipynb                            | 245 +-----------------
 1 file changed, 13 insertions(+), 232 deletions(-)

diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/notebook.ipynb
index 4f8b96e68..6dda032f3 100644
--- a/examples/llama_stack_client_examples/notebook.ipynb
+++ b/examples/llama_stack_client_examples/notebook.ipynb
@@ -19,173 +19,7 @@
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "ename": "SyntaxError",
-     "evalue": "invalid syntax (2472932708.py, line 1)",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;36m  Cell \u001b[0;32mIn[1], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m    python -m venv .venv\u001b[0m\n\u001b[0m              ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n"
-     ]
-    }
-   ],
-   "source": [
-    "# python -m venv .venv\n",
-    "# source .venv/bin/activate"
-   ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": 2,
-   "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n",
-      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n",
-      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
-      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n",
-      "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n",
-      "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n",
-      "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n",
-      "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n",
-      "Requirement already satisfied: llama-models>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n",
-      "Requirement already satisfied: llama-stack-client>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n",
-      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n",
-      "Requirement already satisfied: requests in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.32.3)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n",
-      "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n",
-      "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (6.0.2)\n",
-      "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (3.1.4)\n",
-      "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (0.8.0)\n",
-      "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (11.0.0)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.6.2.post1)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.9.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (24.9.0)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.67.0)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.12.2)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n",
-      "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
-      "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.21.0)\n",
-      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (5.3.0)\n",
-      "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.16.1)\n",
-      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n",
-      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests->llama-stack) (3.4.0)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
-      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.57->llama-stack) (3.0.2)\n",
-      "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.57->llama-stack) (2024.11.6)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.57->llama-stack) (1.16.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n",
-      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n",
-      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n",
-      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n",
-      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: fastapi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.115.5)\n",
-      "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (0.41.3)\n",
-      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (2.10.1)\n",
-      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (4.12.2)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
-      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.6.2.post1)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
-      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (0.49b2)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (4.12.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
-      "\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n",
-      "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U llama-stack\n",
@@ -243,34 +77,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n",
-      "\n",
-      "\u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[33m of\u001b[0m\u001b[33m night\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
@@ -315,7 +124,7 @@
     "    stream=False\n",
     ")\n",
     "\n",
-    "print(f\"> Response: {response}\")\n",
+    "print(f\"> Response: {response.completion_message.content}\")\n",
     "agentops.end_session(\"Success\")"
    ]
   },
@@ -328,36 +137,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 11,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No available shields. Disable safety.\n",
-      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
-      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x1304e75a0>\n",
-      "response=<async_generator object LlamaStackClientProvider.handle_response.<locals>.async_generator at 0x1304e7140>\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -365,15 +147,12 @@
     "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
     "from llama_stack_client.types.agent_create_params import AgentConfig\n",
     "\n",
-    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
-    "\n",
-    "# Apply nest_asyncio to handle nested event loops\n",
-    "# nest_asyncio.apply()\n",
+    "agentops.start_session()\n",
     "\n",
     "LLAMA_STACK_PORT = 5001\n",
     "\n",
     "# Replace with actual API keys for functionality\n",
-    "os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"your-brave-search-api-key\"\n",
+    "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n",
     "\n",
     "async def agent_test():\n",
     "    client = LlamaStackClient(\n",
@@ -404,7 +183,7 @@
     "            {\n",
     "                \"type\": \"brave_search\",\n",
     "                \"engine\": \"brave\",\n",
-    "                \"api_key\": os.getenv(\"BRAVE_SEARCH_API_KEY\"),\n",
+    "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
     "            }\n",
     "        ],\n",
     "        tool_choice=\"auto\",\n",
@@ -434,7 +213,7 @@
     "\n",
     "        print(f\"{response=}\")\n",
     "\n",
-    "        # async for log in EventLogger().log(response):\n",
+    "        # for log in EventLogger().log(response):\n",
     "        #     log.print()\n",
     "\n",
     "agentops.start_session()\n",
@@ -449,7 +228,9 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": []
+   "source": [
+    "agentops.end_all_sessions()"
+   ]
   }
  ],
  "metadata": {

From 51847e7d0e31f76e638476d17ec36916d375d466 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:20:58 +0530
Subject: [PATCH 58/69] updated examples readme and notebook renamed

---
 .../llama_stack_client_examples/README.md     | 28 ++++---------------
 ...tebook.ipynb => llama_stack_example.ipynb} |  0
 2 files changed, 6 insertions(+), 22 deletions(-)
 rename examples/llama_stack_client_examples/{notebook.ipynb => llama_stack_example.ipynb} (100%)

diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md
index d558f7e2c..c838096d5 100644
--- a/examples/llama_stack_client_examples/README.md
+++ b/examples/llama_stack_client_examples/README.md
@@ -1,15 +1,17 @@
 # Llama Stack Client Examples
 
-Run Llama Stack with Ollama - either local or containerized.
+The example notebook demonstrates how to use the Llama Stack Client to monitor an Agentic application using AgentOps. We have also provided a `compose.yaml` file to run Ollama in a container.
 
 ## Quick Start
 
-Just run:
+First run the following command to start the Ollama server with the Llama Stack client:
 
 ```bash
 docker compose up
 ```
 
+Next, run the [notebook](./llama_stack_example.ipynb) to see the waterfall visualization in the [AgentOps](https://app.agentops.ai) dashboard.
+
 ## Environment Variables
 
 | Variable | Description | Default |
@@ -17,34 +19,16 @@ docker compose up
 | `LLAMA_STACK_PORT` | Server port | 5001 |
 | `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct |
 | `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 |
-| ⚠️ **Important:** | The model IDs must match their respective formats - Ollama and Llama Stack use different naming conventions for the same models | - |
 | `SAFETY_MODEL` | Optional safety model | - |
 | `NETWORK_MODE` | Docker network mode | auto-configured |
 | `OLLAMA_URL` | Ollama API URL | auto-configured |
 
 ## Common Gotchas
 
-1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently - for instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama.
-
-2. Ensure Docker has sufficient system memory allocation to run properly
-
-```
-llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list
-┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓
-┃ identifier                       ┃ provider_id ┃ provider_resource_id      ┃ metadata ┃
-┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩
-│ meta-llama/Llama-3.2-1B-Instruct │ ollama      │ llama3.2:1b-instruct-fp16 │          │
-└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘
-```
+1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently. For instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama.
 
-2. Docker needs sufficient memory allocation
+2. Ensure Docker is configured with sufficient system memory allocation to run properly.
 
-3. Ollama commands:
-   ```bash
-   ollama list
-   ollama help
-   ollama ps
-   ```
 
 ## References
 
diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb
similarity index 100%
rename from examples/llama_stack_client_examples/notebook.ipynb
rename to examples/llama_stack_client_examples/llama_stack_example.ipynb

From 81651fab31a654db5c0a37b160b4c158245ae0dc Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:30:30 +0530
Subject: [PATCH 59/69] updated docs

---
 docs/v1/integrations/llama_stack.mdx | 25 ++++++++++++++++---------
 1 file changed, 16 insertions(+), 9 deletions(-)

diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx
index 163a4ca8e..bb0f9a83c 100644
--- a/docs/v1/integrations/llama_stack.mdx
+++ b/docs/v1/integrations/llama_stack.mdx
@@ -1,14 +1,14 @@
 ---
 title: 'Llama Stack'
-description: '[Llama Stack](https://llama-stack.readthedocs.io/) is a framework for building Agentic applications.'
+description: 'Llama Stack is a framework from Meta AI for building Agentic applications.'
 ---
 
 import CodeTooltip from '/snippets/add-code-tooltip.mdx'
 import EnvTooltip from '/snippets/add-env-tooltip.mdx'
 
-AgentOps has built an integration with Llama Stack to make monitoring applications that leverage [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) simple.
+AgentOps integrates with Llama Stack via its python [client](https://github.com/meta-llama/llama-stack-client-python) to provide observability into applications that leverage it.
 
-Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide.
+Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. You can use this guide to setup the Llama Stack server and client or alternatively use our Docker [compose](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/docker-compose.yml) file.
 
 ## Adding AgentOps to Llama Stack applications
 
@@ -23,7 +23,17 @@ Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io
 			```
 		</CodeGroup>
 	</Step>
-	<Step title="Add 2 lines of code">
+	<Step title="Install the Llama Stack Client">
+		<CodeGroup>
+			```bash pip
+			pip install llama-stack-client
+			```
+			```bash poetry
+			poetry add llama-stack-client
+			```
+		</CodeGroup>
+	</Step>
+	<Step title="Add 3 lines of code">
 		<CodeTooltip/>
 		<span className="api-key-container">
 			<CodeGroup>
@@ -51,12 +61,9 @@ Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io
 	</Step>
 </Steps>
 
-## Llama Stack + AgentOps Examples
+## Examples
 
-<CardGroup cols={2}>
-    <Card title="Agent Class" icon="microchip-ai" href="https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb" />
-	  <Card title="Inference Class" icon="question" href="https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699" />
-</CardGroup>
+An example notebook is available [here](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/notebook.ipynb) to showcase how to use the Llama Stack client with AgentOps.
 
 <script type="module" src="/scripts/github_stars.js"></script>
 <script type="module" src="/scripts/link_to_api_button.js"></script>

From d76b784ad590c2c18f240a75f672a8ff08cbccdb Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:32:51 +0530
Subject: [PATCH 60/69] clean integration code

---
 agentops/llms/llama_stack_client.py | 17 -----------------
 1 file changed, 17 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index ab2f1e356..19429999a 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -34,15 +34,9 @@ def handle_stream_chunk(chunk: dict):
                 nonlocal stack
 
                 # NOTE: prompt/completion usage not returned in response when streaming
-                # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-                # if llm_event.returns is None:
-                #     llm_event.returns = chunk.event
 
                 try:
                     nonlocal accum_delta
-                    # llm_event.agent_id = check_call_stack_for_agent_id()
-                    # llm_event.model = kwargs["model_id"]
-                    # llm_event.prompt = kwargs["messages"]
 
                     if chunk.event.event_type == "start":
                         llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
@@ -81,8 +75,6 @@ def handle_stream_chunk(chunk: dict):
 
             def handle_stream_agent(chunk: dict):
                 # NOTE: prompt/completion usage not returned in response when streaming
-                # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion
-                # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                 
                 # nonlocal llm_event
                 nonlocal stack
@@ -90,8 +82,6 @@ def handle_stream_agent(chunk: dict):
                 if session is not None:
                     llm_event.session_id = session.session_id
 
-                # if getattr(llm_event, 'returns', None):
-                #     llm_event.returns = chunk.event
                 try:
                     if chunk.event.payload.event_type == "turn_start":
                         logger.debug("turn_start")
@@ -126,9 +116,7 @@ def handle_stream_agent(chunk: dict):
                                     "event": tool_event
                                 })
 
-                                # self._safe_record(session, tool_event)
                             elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
-                                # print('ToolExecution - in_progress')
                                 nonlocal accum_tool_delta
                                 delta = chunk.event.payload.tool_call_delta.content
                                 if accum_tool_delta:
@@ -141,7 +129,6 @@ def handle_stream_agent(chunk: dict):
                                     
                                     tool_event = stack.pop().get("event")
                                     tool_event.end_timestamp = get_ISO_time()
-                                    # tool_event.name = "ToolExecution - success"
                                     tool_event.params["completion"] = accum_tool_delta
                                     self._safe_record(session, tool_event)    
                             elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
@@ -149,7 +136,6 @@ def handle_stream_agent(chunk: dict):
                                 if stack[-1]['event_type'] == "ToolExecution - started":
                                     tool_event = stack.pop().get("event")
                                     tool_event.end_timestamp = get_ISO_time()
-                                    # tool_event.name = "ToolExecution - failure"
                                     tool_event.params["completion"] = accum_tool_delta
                                     self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
 
@@ -183,9 +169,6 @@ def handle_stream_agent(chunk: dict):
                     elif chunk.event.payload.event_type == "turn_complete":
                         if stack[-1]['event_type'] == "turn_start":
                             logger.debug('turn_start')
-                            # llm_event = stack.pop()
-                            # llm_event.end_timestamp = get_ISO_time()
-                            # self._safe_record(session, llm_event)
                         pass
 
                 except Exception as e:

From cdf33a7da5e1f7aeacfd74c25ab9930f6d5e1edc Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:34:28 +0530
Subject: [PATCH 61/69] linting

---
 agentops/llms/llama_stack_client.py | 119 ++++++++++++++++------------
 1 file changed, 68 insertions(+), 51 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 19429999a..6de8e0530 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -11,16 +11,18 @@
 from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
 from agentops.llms.instrumented_provider import InstrumentedProvider
 
+
 class LlamaStackClientProvider(InstrumentedProvider):
     original_complete = None
     original_create_turn = None
-    
 
     def __init__(self, client):
         super().__init__(client)
         self._provider_name = "LlamaStack"
 
-    def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict:
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}
+    ) -> dict:
         """Handle responses for LlamaStack"""
         try:
             stack = []
@@ -30,7 +32,6 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se
             # llm_event = None
 
             def handle_stream_chunk(chunk: dict):
-
                 nonlocal stack
 
                 # NOTE: prompt/completion usage not returned in response when streaming
@@ -40,16 +41,15 @@ def handle_stream_chunk(chunk: dict):
 
                     if chunk.event.event_type == "start":
                         llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
-                        stack.append({
-                            'event_type': "start",
-                            'event': llm_event
-                        })
+                        stack.append({"event_type": "start", "event": llm_event})
                         accum_delta = chunk.event.delta
                     elif chunk.event.event_type == "progress":
                         accum_delta += chunk.event.delta
                     elif chunk.event.event_type == "complete":
-                        if stack[-1]['event_type'] == "start": # check if the last event in the stack is a step start event
-                            llm_event = stack.pop().get("event")                            
+                        if (
+                            stack[-1]["event_type"] == "start"
+                        ):  # check if the last event in the stack is a step start event
+                            llm_event = stack.pop().get("event")
                             llm_event.prompt = [
                                 {"content": message.content, "role": message.role} for message in kwargs["messages"]
                             ]
@@ -75,7 +75,7 @@ def handle_stream_chunk(chunk: dict):
 
             def handle_stream_agent(chunk: dict):
                 # NOTE: prompt/completion usage not returned in response when streaming
-                
+
                 # nonlocal llm_event
                 nonlocal stack
 
@@ -85,19 +85,16 @@ def handle_stream_agent(chunk: dict):
                 try:
                     if chunk.event.payload.event_type == "turn_start":
                         logger.debug("turn_start")
-                        stack.append({
-                            'event_type': chunk.event.payload.event_type,
-                            'event': None
-                        })
+                        stack.append({"event_type": chunk.event.payload.event_type, "event": None})
                     elif chunk.event.payload.event_type == "step_start":
                         logger.debug("step_start")
                         llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs)
-                        stack.append({
-                            'event_type': chunk.event.payload.event_type,
-                            'event': llm_event
-                        })
+                        stack.append({"event_type": chunk.event.payload.event_type, "event": llm_event})
                     elif chunk.event.payload.event_type == "step_progress":
-                        if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response):
+                        if (
+                            chunk.event.payload.step_type == "inference"
+                            and chunk.event.payload.text_delta_model_response
+                        ):
                             nonlocal accum_delta
                             delta = chunk.event.payload.text_delta_model_response
 
@@ -105,50 +102,54 @@ def handle_stream_agent(chunk: dict):
                                 accum_delta += delta
                             else:
                                 accum_delta = delta
-                        elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta):  
-                            if (chunk.event.payload.tool_call_delta.parse_status == "started"):
-                                logger.debug('tool_started')
+                        elif chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta:
+                            if chunk.event.payload.tool_call_delta.parse_status == "started":
+                                logger.debug("tool_started")
                                 tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs)
                                 tool_event.name = "tool_started"
 
-                                stack.append({
-                                    "event_type": "tool_started",
-                                    "event": tool_event
-                                })
+                                stack.append({"event_type": "tool_started", "event": tool_event})
 
-                            elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"):
+                            elif chunk.event.payload.tool_call_delta.parse_status == "in_progress":
                                 nonlocal accum_tool_delta
                                 delta = chunk.event.payload.tool_call_delta.content
                                 if accum_tool_delta:
                                     accum_tool_delta += delta
                                 else:
                                     accum_tool_delta = delta
-                            elif (chunk.event.payload.tool_call_delta.parse_status == "success"):
-                                logger.debug('ToolExecution - success')
-                                if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event
-                                    
+                            elif chunk.event.payload.tool_call_delta.parse_status == "success":
+                                logger.debug("ToolExecution - success")
+                                if (
+                                    stack[-1]["event_type"] == "tool_started"
+                                ):  # check if the last event in the stack is a tool execution event
                                     tool_event = stack.pop().get("event")
                                     tool_event.end_timestamp = get_ISO_time()
                                     tool_event.params["completion"] = accum_tool_delta
-                                    self._safe_record(session, tool_event)    
-                            elif (chunk.event.payload.tool_call_delta.parse_status == "failure"):
-                                logger.warning('ToolExecution - failure')
-                                if stack[-1]['event_type'] == "ToolExecution - started":
+                                    self._safe_record(session, tool_event)
+                            elif chunk.event.payload.tool_call_delta.parse_status == "failure":
+                                logger.warning("ToolExecution - failure")
+                                if stack[-1]["event_type"] == "ToolExecution - started":
                                     tool_event = stack.pop().get("event")
                                     tool_event.end_timestamp = get_ISO_time()
                                     tool_event.params["completion"] = accum_tool_delta
-                                    self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure")))
+                                    self._safe_record(
+                                        session,
+                                        ErrorEvent(
+                                            trigger_event=tool_event, exception=Exception("ToolExecution - failure")
+                                        ),
+                                    )
 
                     elif chunk.event.payload.event_type == "step_complete":
                         logger.debug("Step complete event received")
-                        
-                        if (chunk.event.payload.step_type == "inference"):
+
+                        if chunk.event.payload.step_type == "inference":
                             logger.debug("Step complete inference")
-                            
-                            if stack[-1]['event_type'] == "step_start":
-                                llm_event = stack.pop().get("event")                            
+
+                            if stack[-1]["event_type"] == "step_start":
+                                llm_event = stack.pop().get("event")
                                 llm_event.prompt = [
-                                    {"content": message['content'], "role": message['role']} for message in kwargs["messages"]
+                                    {"content": message["content"], "role": message["role"]}
+                                    for message in kwargs["messages"]
                                 ]
                                 llm_event.agent_id = check_call_stack_for_agent_id()
                                 llm_event.model = metadata.get("model_id", "Unable to identify model")
@@ -159,16 +160,16 @@ def handle_stream_agent(chunk: dict):
                                 self._safe_record(session, llm_event)
                             else:
                                 logger.warning("Unexpected event stack state for inference step complete")
-                        elif (chunk.event.payload.step_type == "tool_execution"):
-                            if stack[-1]['event_type'] == "tool_started":
-                                logger.debug('tool_complete')
+                        elif chunk.event.payload.step_type == "tool_execution":
+                            if stack[-1]["event_type"] == "tool_started":
+                                logger.debug("tool_complete")
                                 tool_event = stack.pop().get("event")
                                 tool_event.name = "tool_complete"
                                 tool_event.params["completion"] = accum_tool_delta
                                 self._safe_record(session, tool_event)
                     elif chunk.event.payload.event_type == "turn_complete":
-                        if stack[-1]['event_type'] == "turn_start":
-                            logger.debug('turn_start')
+                        if stack[-1]["event_type"] == "turn_start":
+                            logger.debug("turn_start")
                         pass
 
                 except Exception as e:
@@ -183,23 +184,30 @@ def handle_stream_agent(chunk: dict):
                         f"chunk:\n {chunk}\n"
                         f"kwargs:\n {kwargs_str}\n"
                     )
+
             if kwargs.get("stream", False):
+
                 def generator():
                     for chunk in response:
                         handle_stream_chunk(chunk)
                         yield chunk
+
                 return generator()
             elif inspect.isasyncgen(response):
+
                 async def async_generator():
                     async for chunk in response:
                         handle_stream_agent(chunk)
                         yield chunk
+
                 return async_generator()
             elif inspect.isgenerator(response):
+
                 async def async_generator():
                     async for chunk in response:
                         handle_stream_agent(chunk)
                         yield chunk
+
                 return async_generator()
             else:
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
@@ -209,7 +217,9 @@ async def async_generator():
                 llm_event.returns = response
                 llm_event.agent_id = check_call_stack_for_agent_id()
                 llm_event.model = kwargs["model_id"]
-                llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]]
+                llm_event.prompt = [
+                    {"content": message.content, "role": message.role} for message in kwargs["messages"]
+                ]
                 llm_event.prompt_tokens = None
                 llm_event.completion = response.completion_message.content
                 llm_event.completion_tokens = None
@@ -257,14 +267,19 @@ def patched_function(*args, **kwargs):
             session = kwargs.get("session", None)
             if "session" in kwargs.keys():
                 del kwargs["session"]
-                
+
             result = self.original_create_turn(*args, **kwargs)
-            return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")})
+            return self.handle_response(
+                result,
+                kwargs,
+                init_timestamp,
+                session=session,
+                metadata={"model_id": args[0].agent_config.get("model")},
+            )
 
         # Override the original method with the patched one
         Agent.create_turn = patched_function
 
-
     def override(self):
         self._override_complete()
         self._override_create_turn()
@@ -272,8 +287,10 @@ def override(self):
     def undo_override(self):
         if self.original_complete is not None:
             from llama_stack_client.resources import InferenceResource
+
             InferenceResource.chat_completion = self.original_complete
 
         if self.original_create_turn is not None:
             from llama_stack_client.lib.agents.agent import Agent
+
             Agent.create_turn = self.original_create_turn

From e66c878be63b463669156631203f02ff6f66f9d6 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sat, 7 Dec 2024 20:37:08 +0530
Subject: [PATCH 62/69] linting tests

---
 .../inference_canary_1.py                     |  2 +
 .../inference_canary_2.py                     |  4 +-
 tests/llama_stack/test_llama_stack.py         | 43 ++++++++++---------
 3 files changed, 27 insertions(+), 22 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
index afbe48ff8..c88dfa48c 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py
@@ -19,6 +19,7 @@
     base_url=f"{full_host}",
 )
 
+
 async def stream_test():
     response = client.inference.chat_completion(
         messages=[
@@ -40,4 +41,5 @@ def main():
     asyncio.run(stream_test())
     agentops.end_session(end_state="Success")
 
+
 main()
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
index 03a3f51ab..7c43ce510 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py
@@ -18,6 +18,7 @@
     base_url=f"{full_host}",
 )
 
+
 async def stream_test():
     response = client.inference.chat_completion(
         messages=[
@@ -49,7 +50,8 @@ def main():
     )
 
     print(response.completion_message.content)
-    
+
     agentops.end_session(end_state="Success")
 
+
 main()
diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py
index 47b80e767..4e5868de0 100644
--- a/tests/llama_stack/test_llama_stack.py
+++ b/tests/llama_stack/test_llama_stack.py
@@ -3,35 +3,36 @@
 from llama_stack_client.lib.inference.event_logger import EventLogger
 from unittest.mock import MagicMock
 
+
 class TestLlamaStack:
     def setup_method(self):
         self.client = LlamaStackClient()
         self.client.inference = MagicMock()
-        self.client.inference.chat_completion = MagicMock(return_value=[
-            {
-                "choices": [
-                    {
-                        "message": {
-                            "content": "Moonlight whispers softly",
-                            "role": "assistant",
+        self.client.inference.chat_completion = MagicMock(
+            return_value=[
+                {
+                    "choices": [
+                        {
+                            "message": {
+                                "content": "Moonlight whispers softly",
+                                "role": "assistant",
+                            }
                         }
-                    }
-                ]
-            }
-        ])
-
+                    ]
+                }
+            ]
+        )
 
     def test_llama_stack_inference(self):
         self.client.inference.chat_completion.assert_not_called()
         self.client.inference.chat_completion(
-                messages=[
-                    UserMessage(
-                        content="hello world, write me a 3 word poem about the moon",
-                        role="user",
-                    ),
-                ],
-                model_id="meta-llama/Llama-3.2-1B-Instruct",
-                stream=False,
+            messages=[
+                UserMessage(
+                    content="hello world, write me a 3 word poem about the moon",
+                    role="user",
+                ),
+            ],
+            model_id="meta-llama/Llama-3.2-1B-Instruct",
+            stream=False,
         )
         self.client.inference.chat_completion.assert_called_once()
-        
\ No newline at end of file

From efeffd011b260dde2d63511143f581397ed1f404 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 7 Dec 2024 20:50:59 -0500
Subject: [PATCH 63/69] fix generator bug

---
 agentops/llms/llama_stack_client.py | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index 6de8e0530..be7815c3f 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -24,6 +24,7 @@ def handle_response(
         self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}
     ) -> dict:
         """Handle responses for LlamaStack"""
+
         try:
             stack = []
             accum_delta = None
@@ -194,21 +195,19 @@ def generator():
 
                 return generator()
             elif inspect.isasyncgen(response):
-
-                async def async_generator():
+                async def agent_generator():
                     async for chunk in response:
                         handle_stream_agent(chunk)
                         yield chunk
 
-                return async_generator()
+                return agent_generator()
             elif inspect.isgenerator(response):
-
-                async def async_generator():
-                    async for chunk in response:
+                def agent_generator():
+                    for chunk in response:
                         handle_stream_agent(chunk)
                         yield chunk
 
-                return async_generator()
+                return agent_generator()
             else:
                 llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
                 if session is not None:

From 27db3df82286cb26a7864014b0ebff01de6ed925 Mon Sep 17 00:00:00 2001
From: tad dy <a@a.com>
Date: Sat, 7 Dec 2024 23:49:56 -0500
Subject: [PATCH 64/69] saving working agent_canary test - works in Python
 notebook AND in script

---
 .../llama_stack_example.ipynb                 | 414 +++++++++++++++++-
 .../llama_stack_example_for_ci.ipynb          | 398 +++++++++++++++++
 .../llama_stack_client_canary/agent_canary.py |  24 +-
 3 files changed, 802 insertions(+), 34 deletions(-)
 create mode 100644 examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb

diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb
index 6dda032f3..621b692c0 100644
--- a/examples/llama_stack_client_examples/llama_stack_example.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb
@@ -17,9 +17,206 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 1,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.57)\n",
+      "Collecting llama-stack-client\n",
+      "  Downloading llama_stack_client-0.0.58-py3-none-any.whl.metadata (15 kB)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
+      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n",
+      "Downloading llama_stack_client-0.0.58-py3-none-any.whl (286 kB)\n",
+      "Installing collected packages: llama-stack-client\n",
+      "  Attempting uninstall: llama-stack-client\n",
+      "    Found existing installation: llama_stack_client 0.0.57\n",
+      "    Uninstalling llama_stack_client-0.0.57:\n",
+      "      Successfully uninstalled llama_stack_client-0.0.57\n",
+      "Successfully installed llama-stack-client-0.0.58\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Collecting llama-stack\n",
+      "  Downloading llama_stack-0.0.58-py3-none-any.whl.metadata (12 kB)\n",
+      "Collecting blobfile (from llama-stack)\n",
+      "  Using cached blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n",
+      "Collecting fire (from llama-stack)\n",
+      "  Using cached fire-0.7.0.tar.gz (87 kB)\n",
+      "  Installing build dependencies ... \u001b[?25ldone\n",
+      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
+      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25hRequirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n",
+      "Collecting huggingface-hub (from llama-stack)\n",
+      "  Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n",
+      "Collecting llama-models>=0.0.58 (from llama-stack)\n",
+      "  Downloading llama_models-0.0.58-py3-none-any.whl.metadata (8.2 kB)\n",
+      "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n",
+      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n",
+      "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n",
+      "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n",
+      "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n",
+      "Collecting jinja2 (from llama-models>=0.0.58->llama-stack)\n",
+      "  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n",
+      "Collecting tiktoken (from llama-models>=0.0.58->llama-stack)\n",
+      "  Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\n",
+      "Collecting Pillow (from llama-models>=0.0.58->llama-stack)\n",
+      "  Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.1 kB)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n",
+      "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
+      "Collecting pycryptodomex>=3.8 (from blobfile->llama-stack)\n",
+      "  Using cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl.metadata (3.4 kB)\n",
+      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n",
+      "Collecting lxml>=4.9 (from blobfile->llama-stack)\n",
+      "  Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.8 kB)\n",
+      "Collecting filelock>=3.0 (from blobfile->llama-stack)\n",
+      "  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n",
+      "Collecting fsspec>=2023.5.0 (from huggingface-hub->llama-stack)\n",
+      "  Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n",
+      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
+      "Collecting MarkupSafe>=2.0 (from jinja2->llama-models>=0.0.58->llama-stack)\n",
+      "  Using cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n",
+      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
+      "Collecting regex>=2022.1.18 (from tiktoken->llama-models>=0.0.58->llama-stack)\n",
+      "  Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n",
+      "Downloading llama_stack-0.0.58-py3-none-any.whl (446 kB)\n",
+      "Downloading llama_models-0.0.58-py3-none-any.whl (1.6 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hUsing cached blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
+      "Downloading huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n",
+      "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n",
+      "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n",
+      "Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl (8.1 MB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
+      "\u001b[?25hUsing cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl (2.5 MB)\n",
+      "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n",
+      "Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl (3.0 MB)\n",
+      "Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl (982 kB)\n",
+      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m982.4/982.4 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
+      "\u001b[?25hUsing cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl (12 kB)\n",
+      "Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl (284 kB)\n",
+      "Building wheels for collected packages: fire\n",
+      "  Building wheel for fire (pyproject.toml) ... \u001b[?25ldone\n",
+      "\u001b[?25h  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=28249a5b845d2594cddd5e302164aa8818158be391c1a1b5f0ae4d10c50bd63c\n",
+      "  Stored in directory: /Users/a/Library/Caches/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e\n",
+      "Successfully built fire\n",
+      "Installing collected packages: regex, pycryptodomex, Pillow, MarkupSafe, lxml, fsspec, fire, filelock, tiktoken, jinja2, huggingface-hub, blobfile, llama-models, llama-stack\n",
+      "Successfully installed MarkupSafe-3.0.2 Pillow-11.0.0 blobfile-3.0.0 filelock-3.16.1 fire-0.7.0 fsspec-2024.10.0 huggingface-hub-0.26.5 jinja2-3.1.4 llama-models-0.0.58 llama-stack-0.0.58 lxml-5.3.0 pycryptodomex-3.21.0 regex-2024.11.6 tiktoken-0.8.0\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n",
+      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n",
+      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n",
+      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n",
+      "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n",
+      "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
+      "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Collecting fastapi\n",
+      "  Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n",
+      "Collecting starlette<0.42.0,>=0.40.0 (from fastapi)\n",
+      "  Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n",
+      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
+      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
+      "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n",
+      "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n",
+      "Installing collected packages: starlette, fastapi\n",
+      "Successfully installed fastapi-0.115.6 starlette-0.41.3\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U llama-stack\n",
@@ -30,18 +227,29 @@
     "%pip install opentelemetry-sdk\n"
    ]
   },
-  {
-   "cell_type": "markdown",
-   "metadata": {},
-   "source": [
-    "Then import them"
-   ]
-  },
   {
    "cell_type": "code",
    "execution_count": 3,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps:  WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n"
+     ]
+    }
+   ],
    "source": [
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -77,9 +285,36 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 4,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
@@ -108,9 +343,40 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 5,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "ename": "InternalServerError",
+     "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}",
+     "output_type": "error",
+     "traceback": [
+      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
+      "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
+      "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/agentops/llms/llama_stack_client.py:252\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    251\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 252\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1250\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1251\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1258\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1260\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1261\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1262\u001b[0m     )\n\u001b[0;32m-> 1263\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
+      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1055\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1057\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1061\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1062\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1066\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1067\u001b[0m )\n",
+      "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
+     ]
+    }
+   ],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
@@ -137,9 +403,95 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 9,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=48206eed-d5d8-4979-ab6e-3577faff5ad4\u001b[0m\u001b[0m\n",
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f0f95a35-876f-478d-9542-fe3261ad3d18\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "No available shields. Disable safety.\n",
+      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
+      "response=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x10f44b370>\n",
+      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[97m\u001b[0m\n",
+      "\u001b[30m\u001b[0mresponse=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x10f44a3b0>\n",
+      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m Sem\u001b[0m\u001b[36mif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m teams\u001b[0m\u001b[36m\")\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[97m\u001b[0m\n",
+      "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference Semifinals 2014 teams'}\u001b[0m\n",
+      "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference Semifinals 2014 teams\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"This would be the last Game 7 won by a road <strong>team</strong> until the 2016 <strong>NBA</strong> Finals. Game 4 of the Heat\\u2013Nets series saw LeBron James record a Heat franchise playoff high 49 points. He eventually led the Miami Heat to their fourth consecutive Eastern <strong>Conference</strong> Finals appearance with a win in Game 5. With a Game 5 win over the Portland Trail Blazers, the San Antonio Spurs advanced to the <strong>Western</strong> ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - <strong>Trail Blazers</strong> vs. Spurs\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Playoffs Summary | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/NBA_2014.html\", \"description\": \"Checkout the Results, Statistics, Playoff Leaders, Per Game Stats, Advanced Stats and more for the <strong>2014</strong> <strong>NBA</strong> playoffs on Basketball-Reference.com\", \"type\": \"search_result\"}]}\u001b[0m\n",
+      "\u001b[35mshield_call> No Violation\u001b[0m\n",
+      "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winning\u001b[0m\u001b[33m team\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m of\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m was\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m game\u001b[0m\u001b[33m was\u001b[0m\u001b[33m played\u001b[0m\u001b[33m between\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m ultimately\u001b[0m\u001b[33m advancing\u001b[0m\u001b[33m to\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m Finals\u001b[0m\u001b[33m.\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
+      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[97m\u001b[0m\n",
+      "\u001b[30m\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
+      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
+      "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n"
+     ]
+    }
+   ],
    "source": [
     "import os\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -213,8 +565,8 @@
     "\n",
     "        print(f\"{response=}\")\n",
     "\n",
-    "        # for log in EventLogger().log(response):\n",
-    "        #     log.print()\n",
+    "        for log in EventLogger().log(response):\n",
+    "            log.print()\n",
     "\n",
     "agentops.start_session()\n",
     "\n",
@@ -225,12 +577,32 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 8,
    "metadata": {},
-   "outputs": [],
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 28.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n",
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 19.6s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=516a6f7f-56b5-4f04-bad6-a42d76fc7f55\u001b[0m\u001b[0m\n",
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 9.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=e6a248fb-b78c-4fd4-bffe-50a0a8065bfa\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
    "source": [
     "agentops.end_all_sessions()"
    ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
   }
  ],
  "metadata": {
@@ -249,7 +621,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.12.2"
+   "version": "3.10.0"
   }
  },
  "nbformat": 4,
diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
new file mode 100644
index 000000000..64a40fe12
--- /dev/null
+++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
@@ -0,0 +1,398 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Llama Stack Client Examples\n",
+    "Use the llama_stack_client library to interact with a Llama Stack server"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's install the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n",
+      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n",
+      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
+      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: llama-stack in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n",
+      "Requirement already satisfied: blobfile in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.0)\n",
+      "Requirement already satisfied: fire in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.7.0)\n",
+      "Requirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n",
+      "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.26.5)\n",
+      "Requirement already satisfied: llama-models>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
+      "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
+      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n",
+      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n",
+      "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n",
+      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n",
+      "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n",
+      "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n",
+      "Requirement already satisfied: jinja2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (3.1.4)\n",
+      "Requirement already satisfied: tiktoken in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (0.8.0)\n",
+      "Requirement already satisfied: Pillow in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (11.0.0)\n",
+      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n",
+      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n",
+      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n",
+      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n",
+      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n",
+      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n",
+      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n",
+      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n",
+      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n",
+      "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n",
+      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
+      "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.21.0)\n",
+      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n",
+      "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (5.3.0)\n",
+      "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.16.1)\n",
+      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n",
+      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
+      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n",
+      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n",
+      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n",
+      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
+      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from jinja2->llama-models>=0.0.58->llama-stack) (3.0.2)\n",
+      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n",
+      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n",
+      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
+      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from tiktoken->llama-models>=0.0.58->llama-stack) (2024.11.6)\n",
+      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n",
+      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n",
+      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n",
+      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n",
+      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n",
+      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n",
+      "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n",
+      "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n",
+      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
+      "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n",
+      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
+      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
+      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
+      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: fastapi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.115.6)\n",
+      "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (0.41.3)\n",
+      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n",
+      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n",
+      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
+      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
+      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n",
+      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n",
+      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
+      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n",
+      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n",
+      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n",
+      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n",
+      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
+      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
+      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
+      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
+      "Note: you may need to restart the kernel to use updated packages.\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pip install -U llama-stack-client\n",
+    "%pip install -U llama-stack\n",
+    "%pip install -U agentops\n",
+    "%pip install -U python-dotenv\n",
+    "%pip install -U fastapi\n",
+    "%pip install opentelemetry-api\n",
+    "%pip install opentelemetry-sdk\n"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps:  WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n"
+     ]
+    }
+   ],
+   "source": [
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.inference.event_logger import EventLogger\n",
+    "from llama_stack_client.types import UserMessage\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from dotenv import load_dotenv\n",
+    "import os\n",
+    "import agentops\n",
+    "\n",
+    "load_dotenv()\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\"\n",
+    "\n",
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n",
+    "\n",
+    "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n",
+    "port = 5001 # LLAMA_STACK_PORT\n",
+    "\n",
+    "full_host = f\"http://{host}:{port}\"\n",
+    "\n",
+    "client = LlamaStackClient(\n",
+    "    base_url=f\"{full_host}\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Inference Canary + Agent Canary"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n"
+     ]
+    },
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n",
+      "No available shields. Disable safety.\n",
+      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
+      "response=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x1240c3990>\n",
+      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m\u001b[97m\u001b[0m\n",
+      "\u001b[30m\u001b[0mresponse=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x1240c3840>\n",
+      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m semif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m winners\u001b[0m\u001b[36m\")\u001b[0m\u001b[97m\u001b[0m\n",
+      "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference semifinals 2014 winners'}\u001b[0m\n",
+      "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference semifinals 2014 winners\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"The <strong>2014</strong> <strong>NBA</strong> playoffs was the postseason tournament of the National Basketball Association&#x27;s 2013\\u201314 season. The tournament concluded with the <strong>Western</strong> <strong>Conference</strong> champion San Antonio Spurs defeating the two-time defending <strong>NBA</strong> champion and Eastern <strong>Conference</strong> champion Miami Heat 4 games to ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Finals | Basketball Wiki | Fandom\", \"url\": \"https://basketball.fandom.com/wiki/2014_NBA_Finals\", \"description\": \"The 2014 NBA Finals was the championship series of the 2013\\u201314 season of the National Basketball Association (NBA) and the conclusion of the season&#x27;s playoffs. The Western Conference champion <strong>San Antonio Spurs</strong> defeated the two-time defending NBA champion and Eastern Conference champion Miami ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - <strong>Trail Blazers</strong> vs. Spurs\", \"type\": \"search_result\"}]}\u001b[0m\n",
+      "\u001b[35mshield_call> No Violation\u001b[0m\n",
+      "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winners\u001b[0m\u001b[33m of\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m in\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m were\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m who\u001b[0m\u001b[33m defeated\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m \u001b[0m\u001b[33m4\u001b[0m\u001b[33m games\u001b[0m\u001b[33m to\u001b[0m\u001b[33m \u001b[0m\u001b[33m0\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
+      "\u001b[30m\u001b[0m"
+     ]
+    },
+    {
+     "name": "stderr",
+     "output_type": "stream",
+     "text": [
+      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 4 | \u001b[1mTools:\u001b[0m 1 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
+      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n"
+     ]
+    }
+   ],
+   "source": [
+    "### Inference Canary\n",
+    "\n",
+    "agentops.start_session() # AgentOps start session\n",
+    "\n",
+    "response = client.inference.chat_completion(\n",
+    "    messages=[\n",
+    "        UserMessage(\n",
+    "            content=\"hello world, write me a 3 word poem about the moon\",\n",
+    "            role=\"user\",\n",
+    "        ),\n",
+    "    ],\n",
+    "    model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n",
+    "    stream=True\n",
+    ")\n",
+    "\n",
+    "async for log in EventLogger().log(response):\n",
+    "    log.print()\n",
+    "\n",
+    "\n",
+    "### Agent Canary\n",
+    "\n",
+    "import os\n",
+    "from llama_stack_client import LlamaStackClient\n",
+    "from llama_stack_client.lib.agents.agent import Agent\n",
+    "from llama_stack_client.lib.agents.event_logger import EventLogger\n",
+    "from llama_stack_client.types.agent_create_params import AgentConfig\n",
+    "\n",
+    "LLAMA_STACK_PORT = 5001\n",
+    "\n",
+    "# Replace with actual API keys for functionality\n",
+    "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n",
+    "\n",
+    "async def agent_test():\n",
+    "    client = LlamaStackClient(\n",
+    "        base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n",
+    "    )\n",
+    "\n",
+    "    available_shields = [shield.identifier for shield in client.shields.list()]\n",
+    "    if not available_shields:\n",
+    "        print(\"No available shields. Disable safety.\")\n",
+    "    else:\n",
+    "        print(f\"Available shields found: {available_shields}\")\n",
+    "    available_models = [model.identifier for model in client.models.list()]\n",
+    "    if not available_models:\n",
+    "        raise ValueError(\"No available models\")\n",
+    "    else:\n",
+    "        selected_model = available_models[0]\n",
+    "        print(f\"Using model: {selected_model}\")\n",
+    "\n",
+    "    agent_config = AgentConfig(\n",
+    "        model=selected_model,\n",
+    "        instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n",
+    "        sampling_params={\n",
+    "            \"strategy\": \"greedy\",\n",
+    "            \"temperature\": 1.0,\n",
+    "            \"top_p\": 0.9,\n",
+    "        },\n",
+    "        tools=[\n",
+    "            {\n",
+    "                \"type\": \"brave_search\",\n",
+    "                \"engine\": \"brave\",\n",
+    "                \"api_key\": BRAVE_SEARCH_API_KEY,\n",
+    "            }\n",
+    "        ],\n",
+    "        tool_choice=\"auto\",\n",
+    "        tool_prompt_format=\"json\",\n",
+    "        input_shields=available_shields if available_shields else [],\n",
+    "        output_shields=available_shields if available_shields else [],\n",
+    "        enable_session_persistence=False,\n",
+    "    )\n",
+    "    agent = Agent(client, agent_config)\n",
+    "    user_prompts = [\n",
+    "        \"Hello\",\n",
+    "        \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n",
+    "    ]\n",
+    "\n",
+    "    session_id = agent.create_session(\"test-session\")\n",
+    "\n",
+    "    for prompt in user_prompts:\n",
+    "        response = agent.create_turn(\n",
+    "            messages=[\n",
+    "                {\n",
+    "                    \"role\": \"user\",\n",
+    "                    \"content\": prompt,\n",
+    "                }\n",
+    "            ],\n",
+    "            session_id=session_id,\n",
+    "        )\n",
+    "\n",
+    "        print(f\"{response=}\")\n",
+    "\n",
+    "        for log in EventLogger().log(response):\n",
+    "            log.print()\n",
+    "\n",
+    "await agent_test()\n",
+    "\n",
+    "agentops.end_session(\"Success\") # AgentOps end session"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 4,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.end_all_sessions()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": []
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "venv",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.10.0"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index ab6399cbc..8095a3c82 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -1,27 +1,28 @@
 import asyncio
 import os
+from dotenv import load_dotenv
 
 from llama_stack_client import LlamaStackClient
 from llama_stack_client.lib.agents.agent import Agent
 from llama_stack_client.lib.agents.event_logger import EventLogger
-from llama_stack_client.types import Attachment
 from llama_stack_client.types.agent_create_params import AgentConfig
 
-import agentops
-
-LLAMA_STACK_PORT = 5001
-INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
+load_dotenv()
 
 # import debugpy
 # debugpy.listen(5678)
 # debugpy.wait_for_client()
 
-agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False)
+import agentops # type: ignore
+agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
+LLAMA_STACK_HOST = "0.0.0.0"
+LLAMA_STACK_PORT = 5001
+INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
 async def agent_test():
     client = LlamaStackClient(
-        base_url=f"http://localhost:{LLAMA_STACK_PORT}",
+        base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}",
     )
 
     available_shields = [shield.identifier for shield in client.shields.list()]
@@ -76,12 +77,9 @@ async def agent_test():
             session_id=session_id,
         )
 
-        print("Response: ", response)
-
-        # for log in EventLogger().log(response):
-        #     log.print()
-
+        for log in EventLogger().log(response):
+            log.print()
 
 agentops.start_session()
 asyncio.run(agent_test())
-agentops.end_session("Success")
+agentops.end_session(end_state="Success")
\ No newline at end of file

From 35ce1db343e983e609c6a97c0ba70e372a56e8c0 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Sun, 8 Dec 2024 17:38:25 +0530
Subject: [PATCH 65/69] clean notebook and remove commented code

---
 .../llama_stack_example.ipynb                 | 403 +-----------------
 .../llama_stack_example_for_ci.ipynb          | 203 +--------
 .../llama_stack_client_canary/agent_canary.py |   4 -
 3 files changed, 18 insertions(+), 592 deletions(-)

diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb
index 621b692c0..42297557c 100644
--- a/examples/llama_stack_client_examples/llama_stack_example.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb
@@ -17,206 +17,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.57)\n",
-      "Collecting llama-stack-client\n",
-      "  Downloading llama_stack_client-0.0.58-py3-none-any.whl.metadata (15 kB)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n",
-      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n",
-      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
-      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n",
-      "Downloading llama_stack_client-0.0.58-py3-none-any.whl (286 kB)\n",
-      "Installing collected packages: llama-stack-client\n",
-      "  Attempting uninstall: llama-stack-client\n",
-      "    Found existing installation: llama_stack_client 0.0.57\n",
-      "    Uninstalling llama_stack_client-0.0.57:\n",
-      "      Successfully uninstalled llama_stack_client-0.0.57\n",
-      "Successfully installed llama-stack-client-0.0.58\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Collecting llama-stack\n",
-      "  Downloading llama_stack-0.0.58-py3-none-any.whl.metadata (12 kB)\n",
-      "Collecting blobfile (from llama-stack)\n",
-      "  Using cached blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n",
-      "Collecting fire (from llama-stack)\n",
-      "  Using cached fire-0.7.0.tar.gz (87 kB)\n",
-      "  Installing build dependencies ... \u001b[?25ldone\n",
-      "\u001b[?25h  Getting requirements to build wheel ... \u001b[?25ldone\n",
-      "\u001b[?25h  Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25hRequirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n",
-      "Collecting huggingface-hub (from llama-stack)\n",
-      "  Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n",
-      "Collecting llama-models>=0.0.58 (from llama-stack)\n",
-      "  Downloading llama_models-0.0.58-py3-none-any.whl.metadata (8.2 kB)\n",
-      "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n",
-      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n",
-      "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n",
-      "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n",
-      "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n",
-      "Collecting jinja2 (from llama-models>=0.0.58->llama-stack)\n",
-      "  Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n",
-      "Collecting tiktoken (from llama-models>=0.0.58->llama-stack)\n",
-      "  Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\n",
-      "Collecting Pillow (from llama-models>=0.0.58->llama-stack)\n",
-      "  Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.1 kB)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n",
-      "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
-      "Collecting pycryptodomex>=3.8 (from blobfile->llama-stack)\n",
-      "  Using cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl.metadata (3.4 kB)\n",
-      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n",
-      "Collecting lxml>=4.9 (from blobfile->llama-stack)\n",
-      "  Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.8 kB)\n",
-      "Collecting filelock>=3.0 (from blobfile->llama-stack)\n",
-      "  Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n",
-      "Collecting fsspec>=2023.5.0 (from huggingface-hub->llama-stack)\n",
-      "  Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n",
-      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
-      "Collecting MarkupSafe>=2.0 (from jinja2->llama-models>=0.0.58->llama-stack)\n",
-      "  Using cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n",
-      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
-      "Collecting regex>=2022.1.18 (from tiktoken->llama-models>=0.0.58->llama-stack)\n",
-      "  Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n",
-      "Downloading llama_stack-0.0.58-py3-none-any.whl (446 kB)\n",
-      "Downloading llama_models-0.0.58-py3-none-any.whl (1.6 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hUsing cached blobfile-3.0.0-py3-none-any.whl (75 kB)\n",
-      "Downloading huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n",
-      "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n",
-      "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n",
-      "Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl (8.1 MB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n",
-      "\u001b[?25hUsing cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl (2.5 MB)\n",
-      "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n",
-      "Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl (3.0 MB)\n",
-      "Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl (982 kB)\n",
-      "\u001b[2K   \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m982.4/982.4 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n",
-      "\u001b[?25hUsing cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl (12 kB)\n",
-      "Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl (284 kB)\n",
-      "Building wheels for collected packages: fire\n",
-      "  Building wheel for fire (pyproject.toml) ... \u001b[?25ldone\n",
-      "\u001b[?25h  Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=28249a5b845d2594cddd5e302164aa8818158be391c1a1b5f0ae4d10c50bd63c\n",
-      "  Stored in directory: /Users/a/Library/Caches/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e\n",
-      "Successfully built fire\n",
-      "Installing collected packages: regex, pycryptodomex, Pillow, MarkupSafe, lxml, fsspec, fire, filelock, tiktoken, jinja2, huggingface-hub, blobfile, llama-models, llama-stack\n",
-      "Successfully installed MarkupSafe-3.0.2 Pillow-11.0.0 blobfile-3.0.0 filelock-3.16.1 fire-0.7.0 fsspec-2024.10.0 huggingface-hub-0.26.5 jinja2-3.1.4 llama-models-0.0.58 llama-stack-0.0.58 lxml-5.3.0 pycryptodomex-3.21.0 regex-2024.11.6 tiktoken-0.8.0\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n",
-      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n",
-      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n",
-      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n",
-      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n",
-      "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n",
-      "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
-      "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Collecting fastapi\n",
-      "  Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n",
-      "Collecting starlette<0.42.0,>=0.40.0 (from fastapi)\n",
-      "  Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n",
-      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n",
-      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
-      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
-      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
-      "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n",
-      "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n",
-      "Installing collected packages: starlette, fastapi\n",
-      "Successfully installed fastapi-0.115.6 starlette-0.41.3\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U llama-stack\n",
@@ -229,27 +32,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps:  WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -285,36 +70,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 4,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
@@ -343,40 +101,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 5,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "ename": "InternalServerError",
-     "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}",
-     "output_type": "error",
-     "traceback": [
-      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
-      "\u001b[0;31mInternalServerError\u001b[0m                       Traceback (most recent call last)",
-      "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m      1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      3\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m      4\u001b[0m \u001b[43m        \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m      5\u001b[0m \u001b[43m            \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      6\u001b[0m \u001b[43m            \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m      7\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      8\u001b[0m \u001b[43m    \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m      9\u001b[0m \u001b[43m    \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m     10\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m     11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m     13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m     14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/agentops/llms/llama_stack_client.py:252\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete.<locals>.patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m    251\u001b[0m     \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 252\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m    253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args.<locals>.inner.<locals>.wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    273\u001b[0m             msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m    274\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m    210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m    211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m    212\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m    213\u001b[0m     \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m    214\u001b[0m }\n\u001b[1;32m    215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m    216\u001b[0m     InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m     \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    218\u001b[0m \u001b[43m        \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m    219\u001b[0m \u001b[43m        \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    220\u001b[0m \u001b[43m            \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m    221\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    222\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    223\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    224\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    225\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    226\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    227\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    228\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    229\u001b[0m \u001b[43m                \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    230\u001b[0m \u001b[43m            \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    231\u001b[0m \u001b[43m            \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    232\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    233\u001b[0m \u001b[43m        \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    234\u001b[0m \u001b[43m            \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m    235\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    236\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    237\u001b[0m \u001b[43m            \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m    238\u001b[0m \u001b[43m        \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m  \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m    239\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m    240\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    241\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m    242\u001b[0m )\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m   1250\u001b[0m     \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m   1251\u001b[0m     path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1258\u001b[0m     stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m   1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m   1260\u001b[0m     opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m   1261\u001b[0m         method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m   1262\u001b[0m     )\n\u001b[0;32m-> 1263\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m    952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m    953\u001b[0m     retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m    956\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    957\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    958\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    959\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    960\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m    961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m   1042\u001b[0m     err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m     \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1044\u001b[0m \u001b[43m        \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1045\u001b[0m \u001b[43m        \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1046\u001b[0m \u001b[43m        \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1047\u001b[0m \u001b[43m        \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1048\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1049\u001b[0m \u001b[43m        \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1050\u001b[0m \u001b[43m    \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m   1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m   1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m   1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m   1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m   1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m   1093\u001b[0m \u001b[43m    \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1094\u001b[0m \u001b[43m    \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1095\u001b[0m \u001b[43m    \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1096\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1097\u001b[0m \u001b[43m    \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m   1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n",
-      "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m   1055\u001b[0m         err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m   1057\u001b[0m     log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m     \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m   1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m   1061\u001b[0m     cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m   1062\u001b[0m     options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1066\u001b[0m     retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m   1067\u001b[0m )\n",
-      "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.start_session()\n",
     "response = client.inference.chat_completion(\n",
@@ -403,95 +130,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 9,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=48206eed-d5d8-4979-ab6e-3577faff5ad4\u001b[0m\u001b[0m\n",
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f0f95a35-876f-478d-9542-fe3261ad3d18\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "No available shields. Disable safety.\n",
-      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
-      "response=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x10f44b370>\n",
-      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0mresponse=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x10f44a3b0>\n",
-      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m Sem\u001b[0m\u001b[36mif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m teams\u001b[0m\u001b[36m\")\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n",
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[97m\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference Semifinals 2014 teams'}\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference Semifinals 2014 teams\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"This would be the last Game 7 won by a road <strong>team</strong> until the 2016 <strong>NBA</strong> Finals. Game 4 of the Heat\\u2013Nets series saw LeBron James record a Heat franchise playoff high 49 points. He eventually led the Miami Heat to their fourth consecutive Eastern <strong>Conference</strong> Finals appearance with a win in Game 5. With a Game 5 win over the Portland Trail Blazers, the San Antonio Spurs advanced to the <strong>Western</strong> ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - <strong>Trail Blazers</strong> vs. Spurs\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Playoffs Summary | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/NBA_2014.html\", \"description\": \"Checkout the Results, Statistics, Playoff Leaders, Per Game Stats, Advanced Stats and more for the <strong>2014</strong> <strong>NBA</strong> playoffs on Basketball-Reference.com\", \"type\": \"search_result\"}]}\u001b[0m\n",
-      "\u001b[35mshield_call> No Violation\u001b[0m\n",
-      "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winning\u001b[0m\u001b[33m team\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m of\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m was\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m game\u001b[0m\u001b[33m was\u001b[0m\u001b[33m played\u001b[0m\u001b[33m between\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m ultimately\u001b[0m\u001b[33m advancing\u001b[0m\u001b[33m to\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m Finals\u001b[0m\u001b[33m.\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n",
-      "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: API Key is invalid: {<your_agentops_key>}.\n",
-      "\t    Find your API key at https://app.agentops.ai/settings/projects\n",
-      "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "import os\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -577,32 +218,12 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 8,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 28.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n",
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 19.6s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=516a6f7f-56b5-4f04-bad6-a42d76fc7f55\u001b[0m\u001b[0m\n",
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 9.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=e6a248fb-b78c-4fd4-bffe-50a0a8065bfa\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "agentops.end_all_sessions()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
index 64a40fe12..7249e04ea 100644
--- a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
+++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb
@@ -17,151 +17,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 1,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n",
-      "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n",
-      "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n",
-      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: llama-stack in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n",
-      "Requirement already satisfied: blobfile in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.0)\n",
-      "Requirement already satisfied: fire in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.7.0)\n",
-      "Requirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n",
-      "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.26.5)\n",
-      "Requirement already satisfied: llama-models>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
-      "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n",
-      "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n",
-      "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n",
-      "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n",
-      "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n",
-      "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n",
-      "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n",
-      "Requirement already satisfied: jinja2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (3.1.4)\n",
-      "Requirement already satisfied: tiktoken in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (0.8.0)\n",
-      "Requirement already satisfied: Pillow in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (11.0.0)\n",
-      "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n",
-      "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n",
-      "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n",
-      "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n",
-      "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n",
-      "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n",
-      "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n",
-      "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n",
-      "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n",
-      "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n",
-      "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n",
-      "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.21.0)\n",
-      "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n",
-      "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (5.3.0)\n",
-      "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.16.1)\n",
-      "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n",
-      "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n",
-      "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n",
-      "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n",
-      "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n",
-      "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n",
-      "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from jinja2->llama-models>=0.0.58->llama-stack) (3.0.2)\n",
-      "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n",
-      "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n",
-      "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n",
-      "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from tiktoken->llama-models>=0.0.58->llama-stack) (2024.11.6)\n",
-      "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n",
-      "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n",
-      "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n",
-      "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n",
-      "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n",
-      "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n",
-      "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n",
-      "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n",
-      "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n",
-      "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n",
-      "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n",
-      "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n",
-      "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n",
-      "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: fastapi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.115.6)\n",
-      "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (0.41.3)\n",
-      "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n",
-      "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n",
-      "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n",
-      "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n",
-      "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n",
-      "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n",
-      "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n",
-      "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n",
-      "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n",
-      "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n",
-      "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n",
-      "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n",
-      "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n",
-      "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n",
-      "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n",
-      "Note: you may need to restart the kernel to use updated packages.\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "%pip install -U llama-stack-client\n",
     "%pip install -U llama-stack\n",
@@ -174,17 +32,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 2,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps:  WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "from llama_stack_client import LlamaStackClient\n",
     "from llama_stack_client import LlamaStackClient\n",
@@ -220,43 +70,9 @@
   },
   {
    "cell_type": "code",
-   "execution_count": 3,
+   "execution_count": null,
    "metadata": {},
-   "outputs": [
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n"
-     ]
-    },
-    {
-     "name": "stdout",
-     "output_type": "stream",
-     "text": [
-      "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n",
-      "No available shields. Disable safety.\n",
-      "Using model: meta-llama/Llama-3.2-1B-Instruct\n",
-      "response=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x1240c3990>\n",
-      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0mresponse=<generator object LlamaStackClientProvider.handle_response.<locals>.agent_generator at 0x1240c3840>\n",
-      "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m semif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m winners\u001b[0m\u001b[36m\")\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference semifinals 2014 winners'}\u001b[0m\n",
-      "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference semifinals 2014 winners\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"The <strong>2014</strong> <strong>NBA</strong> playoffs was the postseason tournament of the National Basketball Association&#x27;s 2013\\u201314 season. The tournament concluded with the <strong>Western</strong> <strong>Conference</strong> champion San Antonio Spurs defeating the two-time defending <strong>NBA</strong> champion and Eastern <strong>Conference</strong> champion Miami Heat 4 games to ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Finals | Basketball Wiki | Fandom\", \"url\": \"https://basketball.fandom.com/wiki/2014_NBA_Finals\", \"description\": \"The 2014 NBA Finals was the championship series of the 2013\\u201314 season of the National Basketball Association (NBA) and the conclusion of the season&#x27;s playoffs. The Western Conference champion <strong>San Antonio Spurs</strong> defeated the two-time defending NBA champion and Eastern Conference champion Miami ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - <strong>Trail Blazers</strong> vs. Spurs\", \"type\": \"search_result\"}]}\u001b[0m\n",
-      "\u001b[35mshield_call> No Violation\u001b[0m\n",
-      "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winners\u001b[0m\u001b[33m of\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m in\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m were\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m who\u001b[0m\u001b[33m defeated\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m \u001b[0m\u001b[33m4\u001b[0m\u001b[33m games\u001b[0m\u001b[33m to\u001b[0m\u001b[33m \u001b[0m\u001b[33m0\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n",
-      "\u001b[30m\u001b[0m"
-     ]
-    },
-    {
-     "name": "stderr",
-     "output_type": "stream",
-     "text": [
-      "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 4 | \u001b[1mTools:\u001b[0m 1 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n",
-      "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n"
-     ]
-    }
-   ],
+   "outputs": [],
    "source": [
     "### Inference Canary\n",
     "\n",
@@ -365,13 +181,6 @@
    "source": [
     "agentops.end_all_sessions()"
    ]
-  },
-  {
-   "cell_type": "code",
-   "execution_count": null,
-   "metadata": {},
-   "outputs": [],
-   "source": []
   }
  ],
  "metadata": {
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 8095a3c82..2f5682204 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -9,10 +9,6 @@
 
 load_dotenv()
 
-# import debugpy
-# debugpy.listen(5678)
-# debugpy.wait_for_client()
-
 import agentops # type: ignore
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 

From e3d75d7eb5cd7774d2638a5e3dd02b451adfe769 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Tue, 10 Dec 2024 22:47:02 +0530
Subject: [PATCH 66/69] deleting llama-stack test

---
 tests/llama_stack/test_llama_stack.py | 38 ---------------------------
 1 file changed, 38 deletions(-)
 delete mode 100644 tests/llama_stack/test_llama_stack.py

diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py
deleted file mode 100644
index 4e5868de0..000000000
--- a/tests/llama_stack/test_llama_stack.py
+++ /dev/null
@@ -1,38 +0,0 @@
-from llama_stack_client import LlamaStackClient
-from llama_stack_client.types import UserMessage
-from llama_stack_client.lib.inference.event_logger import EventLogger
-from unittest.mock import MagicMock
-
-
-class TestLlamaStack:
-    def setup_method(self):
-        self.client = LlamaStackClient()
-        self.client.inference = MagicMock()
-        self.client.inference.chat_completion = MagicMock(
-            return_value=[
-                {
-                    "choices": [
-                        {
-                            "message": {
-                                "content": "Moonlight whispers softly",
-                                "role": "assistant",
-                            }
-                        }
-                    ]
-                }
-            ]
-        )
-
-    def test_llama_stack_inference(self):
-        self.client.inference.chat_completion.assert_not_called()
-        self.client.inference.chat_completion(
-            messages=[
-                UserMessage(
-                    content="hello world, write me a 3 word poem about the moon",
-                    role="user",
-                ),
-            ],
-            model_id="meta-llama/Llama-3.2-1B-Instruct",
-            stream=False,
-        )
-        self.client.inference.chat_completion.assert_called_once()

From 1777fedf44323419ff09dcc270fc7f06fd8a0080 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Tue, 10 Dec 2024 22:47:30 +0530
Subject: [PATCH 67/69] add llama stack to examples

---
 docs/v1/examples/examples.mdx | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/docs/v1/examples/examples.mdx b/docs/v1/examples/examples.mdx
index df6651884..c148e6728 100644
--- a/docs/v1/examples/examples.mdx
+++ b/docs/v1/examples/examples.mdx
@@ -42,6 +42,9 @@ mode: "wide"
     <Card title="LangChain" icon="crow" href="/v1/examples/langchain">
         Jupyter Notebook with a sample LangChain integration
     </Card>
+    <Card title="Llama Stack" icon="crow" href="/v1/integrations/llama_stack">
+        Create an agent to search the web using Brave Search and find the winner of NBA western conference semifinals 2014
+    </Card>
     <Card title="LiteLLM" icon="wand-magic-sparkles" href="/v1/integrations/litellm">
         Unified interface for multiple LLM providers
     </Card>

From 637df3ff2ccbdcca932367d2bf56a6ecfc27145b Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Tue, 10 Dec 2024 22:49:55 +0530
Subject: [PATCH 68/69] ruff

---
 agentops/llms/llama_stack_client.py                |  2 ++
 .../llama_stack_client_canary/agent_canary.py      |  7 +++++--
 tests/test_host_env.py                             | 14 ++------------
 3 files changed, 9 insertions(+), 14 deletions(-)

diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py
index be7815c3f..8379a6fef 100644
--- a/agentops/llms/llama_stack_client.py
+++ b/agentops/llms/llama_stack_client.py
@@ -195,6 +195,7 @@ def generator():
 
                 return generator()
             elif inspect.isasyncgen(response):
+
                 async def agent_generator():
                     async for chunk in response:
                         handle_stream_agent(chunk)
@@ -202,6 +203,7 @@ async def agent_generator():
 
                 return agent_generator()
             elif inspect.isgenerator(response):
+
                 def agent_generator():
                     for chunk in response:
                         handle_stream_agent(chunk)
diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 2f5682204..2ba2f1b52 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -9,13 +9,15 @@
 
 load_dotenv()
 
-import agentops # type: ignore
+import agentops  # type: ignore
+
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
 LLAMA_STACK_HOST = "0.0.0.0"
 LLAMA_STACK_PORT = 5001
 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct"
 
+
 async def agent_test():
     client = LlamaStackClient(
         base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}",
@@ -76,6 +78,7 @@ async def agent_test():
         for log in EventLogger().log(response):
             log.print()
 
+
 agentops.start_session()
 asyncio.run(agent_test())
-agentops.end_session(end_state="Success")
\ No newline at end of file
+agentops.end_session(end_state="Success")
diff --git a/tests/test_host_env.py b/tests/test_host_env.py
index e6194d3ac..c22796f3f 100644
--- a/tests/test_host_env.py
+++ b/tests/test_host_env.py
@@ -7,18 +7,8 @@
 
 def mock_partitions():
     return [
-        sdiskpart(
-            device="/dev/sda1",
-            mountpoint="/",
-            fstype="ext4",
-            opts="rw,relatime"
-        ),
-        sdiskpart(
-            device="z:\\",
-            mountpoint="z:\\",
-            fstype="ntfs",
-            opts="rw,relatime"
-        ),
+        sdiskpart(device="/dev/sda1", mountpoint="/", fstype="ext4", opts="rw,relatime"),
+        sdiskpart(device="z:\\", mountpoint="z:\\", fstype="ntfs", opts="rw,relatime"),
     ]
 
 

From f9468811f00116e8935eac3df6863137581180b5 Mon Sep 17 00:00:00 2001
From: Pratyush Shukla <ps4534@nyu.edu>
Date: Tue, 10 Dec 2024 22:51:58 +0530
Subject: [PATCH 69/69] fix import

---
 .../providers/llama_stack_client_canary/agent_canary.py        | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
index 2ba2f1b52..1060627db 100644
--- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
+++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py
@@ -1,3 +1,4 @@
+import agentops
 import asyncio
 import os
 from dotenv import load_dotenv
@@ -9,8 +10,6 @@
 
 load_dotenv()
 
-import agentops  # type: ignore
-
 agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False)
 
 LLAMA_STACK_HOST = "0.0.0.0"