From 52c132ec13b7206d5ee10a52ee25f9a8ced628d1 Mon Sep 17 00:00:00 2001 From: tad dy Date: Fri, 22 Nov 2024 14:47:01 -0500 Subject: [PATCH 01/69] llama_stack_client LLM provider --- agentops/llms/__init__.py | 15 ++ agentops/llms/llama_stack_client.py | 142 +++++++++++++++ .../llama_stack_client_examples/README.md | 31 ++++ .../llama_stack_client_examples.ipynb | 163 ++++++++++++++++++ .../providers/llama_stack_client_canary.py | 53 ++++++ 5 files changed, 404 insertions(+) create mode 100644 agentops/llms/llama_stack_client.py create mode 100644 examples/llama_stack_client_examples/README.md create mode 100644 examples/llama_stack_client_examples/llama_stack_client_examples.ipynb create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary.py diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py index a5852d8cd..3c29167c6 100644 --- a/agentops/llms/__init__.py +++ b/agentops/llms/__init__.py @@ -5,6 +5,8 @@ from packaging.version import Version, parse +from agentops.llms.llama_stack_client import LlamaStackClientProvider + from ..log_config import logger from .cohere import CohereProvider @@ -35,6 +37,9 @@ class LlmTracker: "5.4.0": ("chat", "chat_stream"), }, "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")}, + "llama_stack_client": { + "0.0.53": ("resources.InferenceResource.chat_completion"), + }, "groq": { "0.9.0": ("Client.chat", "AsyncClient.chat"), }, @@ -151,6 +156,15 @@ def override_api(self): else: logger.warning(f"Only AI21>=2.0.0 supported. v{module_version} found.") + if api == "llama_stack_client": + module_version = version(api) + + if Version(module_version) >= parse("0.0.53"): + provider = LlamaStackClientProvider(self.client) + provider.override() + else: + logger.warning(f"Only LlamaStackClient>=0.0.53 supported. v{module_version} found.") + def stop_instrumenting(self): OpenAiProvider(self.client).undo_override() GroqProvider(self.client).undo_override() @@ -160,3 +174,4 @@ def stop_instrumenting(self): AnthropicProvider(self.client).undo_override() MistralProvider(self.client).undo_override() AI21Provider(self.client).undo_override() + LlamaStackClientProvider(self.client).undo_override() diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py new file mode 100644 index 000000000..8218fb54b --- /dev/null +++ b/agentops/llms/llama_stack_client.py @@ -0,0 +1,142 @@ +import inspect +import pprint +import sys +from typing import Optional + +from ..event import LLMEvent, ErrorEvent +from ..session import Session +from ..log_config import logger +from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id +from .instrumented_provider import InstrumentedProvider + + +class LlamaStackClientProvider(InstrumentedProvider): + original_complete = None + original_complete_async = None + original_stream = None + original_stream_async = None + + def __init__(self, client): + super().__init__(client) + self._provider_name = "LlamaStack" + + def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict: + """Handle responses for LlamaStack""" + from llama_stack_client import LlamaStackClient + + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: + llm_event.session_id = session.session_id + + def handle_stream_chunk(chunk: dict): + # NOTE: prompt/completion usage not returned in response when streaming + # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion + if llm_event.returns is None: + llm_event.returns = chunk.event + + try: + accumulated_delta = llm_event.returns.delta + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = kwargs["model_id"] + llm_event.prompt = kwargs["messages"] + + # NOTE: We assume for completion only choices[0] is relevant + choice = chunk.event + + if choice.delta: + llm_event.returns.delta += choice.delta + + if choice.event_type == "complete": + + llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.completion = accumulated_delta + llm_event.prompt_tokens = None + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + + except Exception as e: + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + + kwargs_str = pprint.pformat(kwargs) + chunk = pprint.pformat(chunk) + logger.warning( + f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" + f"chunk:\n {chunk}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + # if the response is a generator, decorate the generator + if kwargs["stream"] == True: + + def generator(): + for chunk in response: + handle_stream_chunk(chunk) + yield chunk + + return generator() + + elif inspect.isasyncgen(response): + + async def async_generator(): + async for chunk in response: + handle_stream_chunk(chunk) + yield chunk + + return async_generator() + + try: + llm_event.returns = response + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = kwargs["model_id"] + llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]] + llm_event.prompt_tokens = None + llm_event.completion = response.completion_message.content + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + + self._safe_record(session, llm_event) + except Exception as e: + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + kwargs_str = pprint.pformat(kwargs) + response = pprint.pformat(response) + logger.warning( + f"Unable to parse response for LLM call. Skipping upload to AgentOps\n" + f"response:\n {response}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + return response + + def _override_complete(self): + from llama_stack_client.resources import InferenceResource + + global original_complete + original_complete = InferenceResource.chat_completion + + def patched_function(*args, **kwargs): + # Call the original function with its original arguments + init_timestamp = get_ISO_time() + session = kwargs.get("session", None) + if "session" in kwargs.keys(): + del kwargs["session"] + result = original_complete(*args, **kwargs) + return self.handle_response(result, kwargs, init_timestamp, session=session) + + # Override the original method with the patched one + InferenceResource.chat_completion = patched_function + + def override(self): + self._override_complete() + # self._override_complete_async() + # self._override_stream() + # self._override_stream_async() + + def undo_override(self): + if ( + self.original_complete is not None + ): + + from llama_stack_client.resources import InferenceResource + InferenceResource.chat_completion = self.original_complete diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md new file mode 100644 index 000000000..ef6d6a546 --- /dev/null +++ b/examples/llama_stack_client_examples/README.md @@ -0,0 +1,31 @@ +## How to run Llama Stack server + +export LLAMA_STACK_PORT=5001 +export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" + +docker run \ + -it \ + -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ + -v ~/.llama:/root/.llama \ + -v ./run.yaml:/root/my-run.yaml \ + llamastack/distribution-ollama \ + --yaml-config /root/my-run.yaml \ + --port $LLAMA_STACK_PORT \ + --env INFERENCE_MODEL=$INFERENCE_MODEL \ + --env OLLAMA_URL=http://host.docker.internal:11434 + +## Example Llama Stack server config + +https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml + +## Reference documentation + +- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#setting-up-ollama-server +- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#running-llama-stack + +- https://github.com/meta-llama/llama-stack-client-python +- https://github.com/meta-llama/llama-stack +- download https://ollama.com/ +- https://www.llama.com/docs/getting_the_models/meta/ + +## \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb new file mode 100644 index 000000000..c5d9dceb9 --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -0,0 +1,163 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama Stack Client Examples\n", + "Use the llama_stack_client library to interact with a Llama Stack server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's install the required packages" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "%pip install -U llama-stack-client\n", + "%pip install -U agentops" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Then import them" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.inference.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import agentops" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "load_dotenv()\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n", + "\n", + "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", + "port = 5001 # LLAMA_STACK_PORT\n", + "\n", + "full_host = f\"http://{host}:{port}\"\n", + "\n", + "client = LlamaStackClient(\n", + " base_url=f\"{full_host}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Completion Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " stream=False\n", + ")\n", + "\n", + "print(f\"> Response: {response}\")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Completion with Streaming Example" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " stream=True\n", + ")\n", + "\n", + "async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "agentops.end_session(\"Success\")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "ops", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.9.19" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py new file mode 100644 index 000000000..f61ac3473 --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py @@ -0,0 +1,53 @@ +import asyncio + +import agentops +import os +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from llama_stack_client.lib.inference.event_logger import EventLogger + +load_dotenv() + +agentops.init(default_tags=["llama-stack-client-provider-test"]) + +host = "0.0.0.0" # LLAMA_STACK_HOST +port = 5001 # LLAMA_STACK_PORT + +full_host = f"http://{host}:{port}" + +client = LlamaStackClient( + base_url=f"{full_host}", +) + +response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-3B-Instruct", + stream=False +) + +async def stream_test(): + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-3B-Instruct", + stream=True + ) + + async for log in EventLogger().log(response): + log.print() + + +async def main(): + await stream_test() + +agentops.end_session(end_state="Success") From ec8445db6bc92b5478cbc0807527f897bfece7ad Mon Sep 17 00:00:00 2001 From: Teo Date: Fri, 22 Nov 2024 23:16:38 -0600 Subject: [PATCH 02/69] ruff Signed-off-by: Teo --- agentops/llms/llama_stack_client.py | 17 +++++----- .../providers/llama_stack_client_canary.py | 32 ++++++++++--------- 2 files changed, 25 insertions(+), 24 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 8218fb54b..b9ec8bd7b 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -23,7 +23,7 @@ def __init__(self, client): def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict: """Handle responses for LlamaStack""" from llama_stack_client import LlamaStackClient - + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) if session is not None: llm_event.session_id = session.session_id @@ -47,8 +47,9 @@ def handle_stream_chunk(chunk: dict): llm_event.returns.delta += choice.delta if choice.event_type == "complete": - - llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]] + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] llm_event.agent_id = check_call_stack_for_agent_id() llm_event.completion = accumulated_delta llm_event.prompt_tokens = None @@ -88,9 +89,9 @@ async def async_generator(): try: llm_event.returns = response - llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = kwargs["model_id"] - llm_event.prompt = [{ "content": message.content, "role": message.role } for message in kwargs["messages"]] + llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]] llm_event.prompt_tokens = None llm_event.completion = response.completion_message.content llm_event.completion_tokens = None @@ -134,9 +135,7 @@ def override(self): # self._override_stream_async() def undo_override(self): - if ( - self.original_complete is not None - ): - + if self.original_complete is not None: from llama_stack_client.resources import InferenceResource + InferenceResource.chat_completion = self.original_complete diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py index f61ac3473..0955f9ccc 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py @@ -11,8 +11,8 @@ agentops.init(default_tags=["llama-stack-client-provider-test"]) -host = "0.0.0.0" # LLAMA_STACK_HOST -port = 5001 # LLAMA_STACK_PORT +host = "0.0.0.0" # LLAMA_STACK_HOST +port = 5001 # LLAMA_STACK_PORT full_host = f"http://{host}:{port}" @@ -28,26 +28,28 @@ ), ], model_id="meta-llama/Llama-3.2-3B-Instruct", - stream=False + stream=False, ) + async def stream_test(): - response = client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-3B-Instruct", - stream=True - ) + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-3B-Instruct", + stream=True, + ) - async for log in EventLogger().log(response): - log.print() + async for log in EventLogger().log(response): + log.print() async def main(): await stream_test() + agentops.end_session(end_state="Success") From 2fac7a09e75781e3c08472690e5fd6f83eab6ec7 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sun, 24 Nov 2024 05:52:08 -0500 Subject: [PATCH 03/69] refining and documenting the llama stack integration support & process for future reference --- .../llama_stack_client_examples/README.md | 59 ++++++-- .../llama_stack_client_examples.ipynb | 141 ++++++++++++++++-- 2 files changed, 176 insertions(+), 24 deletions(-) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index ef6d6a546..c24bf1ac0 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -1,31 +1,70 @@ -## How to run Llama Stack server +# TLDR +How to set up a Llama Stack server for supporting the `llama_stack_client_example.ipynb` examples + +## Disclaimer + +As of 11/2024, Llama Stack is new and is subject to breaking changes. + +Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/ + +## High-level steps + +https://llama-stack.readthedocs.io/en/latest/getting_started/index.html# + +1. Download, install, & start Ollama +2. Start the Llama Stack Server +3. Call the Llama Stack Server with a Llama Stack Client + +### 1 - Download, install, & start Ollama + +https://ollama.com/ + +Ollama has an easy-to-use installer available for macOS, Linux, and Windows. + +```sh +export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16" +ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m +``` + +### 2 - Start the Llama Stack server + +You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml` + +```sh export LLAMA_STACK_PORT=5001 export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" - docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v ./run.yaml:/root/my-run.yaml \ + -v ./llama-stack-server-config.yaml:/root/my-run.yaml \ llamastack/distribution-ollama \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ --env INFERENCE_MODEL=$INFERENCE_MODEL \ --env OLLAMA_URL=http://host.docker.internal:11434 +``` -## Example Llama Stack server config +### 3 - Call the Llama Stack Server with a Llama Stack Client -https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml +ie: Check out the examples in the `llama_stack_client_examples.ipynb` file -## Reference documentation +## Common Gotchas -- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#setting-up-ollama-server -- https://llama-stack.readthedocs.io/en/latest/getting_started/distributions/self_hosted_distro/ollama.html#running-llama-stack +1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` +## Useful ollama commands + +- `ollama list` +- `ollama help` +- `ollama ps` + +## Reference links used during development + +- https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml +- https://llama-stack.readthedocs.io - https://github.com/meta-llama/llama-stack-client-python - https://github.com/meta-llama/llama-stack - download https://ollama.com/ - https://www.llama.com/docs/getting_the_models/meta/ - -## \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index c5d9dceb9..d2343e7a2 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,12 +17,76 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", + "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", + "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", + "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", + "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", + "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n", + "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n", + "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Collecting python-dotenv\n", + " Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", + "Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", + "Installing collected packages: python-dotenv\n", + "Successfully installed python-dotenv-1.0.1\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install -U llama-stack-client\n", - "%pip install -U agentops" + "%pip install -U agentops\n", + "%pip install -U python-dotenv" ] }, { @@ -34,7 +98,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -55,7 +119,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, "outputs": [], "source": [ @@ -65,9 +129,17 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=31b39ff6-59b5-417d-a59d-7646c86de968\u001b[0m\u001b[0m\n" + ] + } + ], "source": [ "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n", "\n", @@ -90,10 +162,19 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 6, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='\"Silver glow descends\"', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + ] + } + ], "source": [ + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n", "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", @@ -105,7 +186,8 @@ " stream=False\n", ")\n", "\n", - "print(f\"> Response: {response}\")" + "print(f\"> Response: {response}\")\n", + "agentops.end_session(\"Success\")" ] }, { @@ -117,10 +199,34 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[97m\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.0s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n" + ] + } + ], "source": [ + "agentops.start_session()\n", "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", @@ -137,11 +243,18 @@ "\n", "agentops.end_session(\"Success\")" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { "kernelspec": { - "display_name": "ops", + "display_name": "venv", "language": "python", "name": "python3" }, @@ -155,7 +268,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.9.19" + "version": "3.12.2" } }, "nbformat": 4, From 4299bcbd7dd06dd553296f8ca1c86965e693e896 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sun, 24 Nov 2024 06:02:40 -0500 Subject: [PATCH 04/69] fixing error in the core_manual_test for Llama Stack --- .../providers/llama_stack_client_canary.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary.py index 0955f9ccc..75bb4ac00 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary.py @@ -48,8 +48,10 @@ async def stream_test(): log.print() -async def main(): - await stream_test() +def main(): + asyncio.run(stream_test()) + +main() agentops.end_session(end_state="Success") From 5fb0b36ce49008951499d59edf3a73aac4949ea0 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sun, 24 Nov 2024 06:06:31 -0500 Subject: [PATCH 05/69] removing unnecessary elif branch in llama_stack_client.py llm provider --- agentops/llms/llama_stack_client.py | 9 --------- 1 file changed, 9 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index b9ec8bd7b..b9d25254c 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -78,15 +78,6 @@ def generator(): return generator() - elif inspect.isasyncgen(response): - - async def async_generator(): - async for chunk in response: - handle_stream_chunk(chunk) - yield chunk - - return async_generator() - try: llm_event.returns = response llm_event.agent_id = check_call_stack_for_agent_id() From cfa18996cd55001e7dd01d17d8b659768a2d1295 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sun, 24 Nov 2024 06:11:11 -0500 Subject: [PATCH 06/69] updating llama stack examples & documentation --- .../llama-stack-server-config.yaml | 54 +++++++++++++++++++ .../llama_stack_client_examples.ipynb | 8 +-- 2 files changed, 58 insertions(+), 4 deletions(-) create mode 100644 examples/llama_stack_client_examples/llama-stack-server-config.yaml diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml new file mode 100644 index 000000000..32137fd67 --- /dev/null +++ b/examples/llama_stack_client_examples/llama-stack-server-config.yaml @@ -0,0 +1,54 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: ollama +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index d2343e7a2..33d89f44e 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -162,19 +162,19 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, "outputs": [ { - "name": "stdout", + "name": "stderr", "output_type": "stream", "text": [ - "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='\"Silver glow descends\"', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n" ] } ], "source": [ - "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n", + "agentops.start_session()\n", "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", From af70799d52fd8c0589baafe6bc7e8fa3e07611fc Mon Sep 17 00:00:00 2001 From: tad dy Date: Sun, 24 Nov 2024 06:12:38 -0500 Subject: [PATCH 07/69] updating llama_stack_client_examples.ipynb --- .../llama_stack_client_examples.ipynb | 27 ++++++++++++++----- 1 file changed, 21 insertions(+), 6 deletions(-) diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index 33d89f44e..3e2b6b059 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -162,7 +162,7 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 10, "metadata": {}, "outputs": [ { @@ -171,6 +171,21 @@ "text": [ "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n" ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver light descends', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 3.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n" + ] } ], "source": [ @@ -199,29 +214,29 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[97m\u001b[0m\n" + "\u001b[36mAssistant> \u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.0s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b25689b3-a380-41b5-8856-a4f412c40477\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n" ] } ], From 0b56e40564e7fd8d4cf01fcccf16bd9604baafce Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 25 Nov 2024 13:21:00 -0500 Subject: [PATCH 08/69] saving tweaks to Llama Stack client examples and related README.md after live demo to Maintainers team --- .../llama_stack_client_examples/README.md | 2 +- .../llama_stack_client_examples.ipynb | 48 +++++++------------ 2 files changed, 19 insertions(+), 31 deletions(-) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index c24bf1ac0..176b8c8a3 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -38,7 +38,7 @@ docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ -v ~/.llama:/root/.llama \ - -v ./llama-stack-server-config.yaml:/root/my-run.yaml \ + -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \ llamastack/distribution-ollama \ --yaml-config /root/my-run.yaml \ --port $LLAMA_STACK_PORT \ diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index 3e2b6b059..1b4bd3f7f 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -71,11 +71,7 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n", - "Collecting python-dotenv\n", - " Using cached python_dotenv-1.0.1-py3-none-any.whl.metadata (23 kB)\n", - "Using cached python_dotenv-1.0.1-py3-none-any.whl (19 kB)\n", - "Installing collected packages: python-dotenv\n", - "Successfully installed python-dotenv-1.0.1\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", @@ -98,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ @@ -119,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 2, "metadata": {}, "outputs": [], "source": [ @@ -129,19 +125,11 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 3, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=31b39ff6-59b5-417d-a59d-7646c86de968\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ - "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"])\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", "port = 5001 # LLAMA_STACK_PORT\n", @@ -162,29 +150,29 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver light descends', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver glowing crescent', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 3.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=b59455dd-1b28-411b-a954-4d2728a0ae88\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n" ] } ], @@ -193,7 +181,7 @@ "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", - " content=\"hello world, write me a 3 word poem about the moon\",\n", + " content=\"write me a 3 word poem about the moon\",\n", " role=\"user\",\n", " ),\n", " ],\n", @@ -214,29 +202,29 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" + "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=bd78dee0-6247-42ae-8f4c-787f5f454a1d\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.2s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n" ] } ], From f855873754a6b823c88c40128e061d6fbe9af7b6 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 25 Nov 2024 23:27:02 -0500 Subject: [PATCH 09/69] saving v1 of patching of Llama Stack Agent.create_turn method --- agentops/llms/llama_stack_client.py | 219 ++++++++++++------ .../llama_stack_client_examples.ipynb | 38 +-- .../llama_stack_client_canary/agent_canary.py | 97 ++++++++ .../inference_canary.py} | 35 +-- 4 files changed, 284 insertions(+), 105 deletions(-) create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py rename tests/core_manual_tests/providers/{llama_stack_client_canary.py => llama_stack_client_canary/inference_canary.py} (61%) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index b9d25254c..d0554c170 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -1,7 +1,7 @@ import inspect import pprint import sys -from typing import Optional +from typing import Dict, Optional from ..event import LLMEvent, ErrorEvent from ..session import Session @@ -9,86 +9,135 @@ from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id from .instrumented_provider import InstrumentedProvider - class LlamaStackClientProvider(InstrumentedProvider): original_complete = None - original_complete_async = None - original_stream = None - original_stream_async = None + original_create_turn = None + def __init__(self, client): super().__init__(client) self._provider_name = "LlamaStack" - def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None) -> dict: + def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict: """Handle responses for LlamaStack""" - from llama_stack_client import LlamaStackClient - - llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - if session is not None: - llm_event.session_id = session.session_id - - def handle_stream_chunk(chunk: dict): - # NOTE: prompt/completion usage not returned in response when streaming - # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - if llm_event.returns is None: - llm_event.returns = chunk.event - - try: - accumulated_delta = llm_event.returns.delta - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = kwargs["model_id"] - llm_event.prompt = kwargs["messages"] - - # NOTE: We assume for completion only choices[0] is relevant - choice = chunk.event - - if choice.delta: - llm_event.returns.delta += choice.delta - - if choice.event_type == "complete": - llm_event.prompt = [ - {"content": message.content, "role": message.role} for message in kwargs["messages"] - ] - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.completion = accumulated_delta - llm_event.prompt_tokens = None - llm_event.completion_tokens = None - llm_event.end_timestamp = get_ISO_time() - self._safe_record(session, llm_event) - - except Exception as e: - self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) - - kwargs_str = pprint.pformat(kwargs) - chunk = pprint.pformat(chunk) - logger.warning( - f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" - f"chunk:\n {chunk}\n" - f"kwargs:\n {kwargs_str}\n" - ) - - # if the response is a generator, decorate the generator - if kwargs["stream"] == True: - - def generator(): - for chunk in response: - handle_stream_chunk(chunk) - yield chunk - - return generator() - try: - llm_event.returns = response - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = kwargs["model_id"] - llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]] - llm_event.prompt_tokens = None - llm_event.completion = response.completion_message.content - llm_event.completion_tokens = None - llm_event.end_timestamp = get_ISO_time() - - self._safe_record(session, llm_event) + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: + llm_event.session_id = session.session_id + + def handle_stream_chunk(chunk: dict): + # NOTE: prompt/completion usage not returned in response when streaming + # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion + if llm_event.returns is None: + llm_event.returns = chunk.event + + try: + accumulated_delta = llm_event.returns.delta + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = kwargs["model_id"] + llm_event.prompt = kwargs["messages"] + + # NOTE: We assume for completion only choices[0] is relevant + choice = chunk.event + + if choice.delta: + llm_event.returns.delta += choice.delta + + if choice.event_type == "complete": + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.completion = accumulated_delta + llm_event.prompt_tokens = None + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + + except Exception as e: + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + + kwargs_str = pprint.pformat(kwargs) + chunk = pprint.pformat(chunk) + logger.warning( + f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" + f"chunk:\n {chunk}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + def handle_stream_agent(chunk: dict): + # NOTE: prompt/completion usage not returned in response when streaming + # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion + + if llm_event.returns is None: + llm_event.returns = chunk.event + + try: + if chunk.event.payload.event_type == "step_start": + pass + elif chunk.event.payload.event_type == "turn_start": + pass + elif chunk.event.payload.event_type == "step_progress": + + if (chunk.event.payload.step_type == "inference"): + delta = chunk.event.payload.text_delta_model_response + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = "Llama Stack" + llm_event.prompt = kwargs["messages"] + + if llm_event.completion: + llm_event.completion += delta + else: + llm_event.completion = delta + + elif chunk.event.payload.event_type == "step_complete": + pass + elif chunk.event.payload.event_type == "turn_complete": + llm_event.prompt = [ + {"content": message['content'], "role": message['role']} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.prompt_tokens = None + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + + except Exception as e: + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) + + kwargs_str = pprint.pformat(kwargs) + chunk = pprint.pformat(chunk) + logger.warning( + f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n" + f"chunk:\n {chunk}\n" + f"kwargs:\n {kwargs_str}\n" + ) + + if kwargs.get("stream", False): + def generator(): + for chunk in response: + handle_stream_chunk(chunk) + yield chunk + return generator() + elif inspect.isasyncgen(response): + async def async_generator(): + async for chunk in response: + handle_stream_agent(chunk) + yield chunk + + return async_generator() + else: + llm_event.returns = response + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata["model_id"] + llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]] + llm_event.prompt_tokens = None + llm_event.completion = response.completion_message.content + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + + self._safe_record(session, llm_event) except Exception as e: self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) kwargs_str = pprint.pformat(kwargs) @@ -119,14 +168,36 @@ def patched_function(*args, **kwargs): # Override the original method with the patched one InferenceResource.chat_completion = patched_function + def _override_create_turn(self): + from llama_stack_client.lib.agents.agent import Agent + + global original_create_turn + original_create_turn = Agent.create_turn + + def patched_function(*args, **kwargs): + # Call the original function with its original arguments + init_timestamp = get_ISO_time() + session = kwargs.get("session", None) + if "session" in kwargs.keys(): + del kwargs["session"] + result = original_create_turn(*args, **kwargs) + return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")}) + + # Override the original method with the patched one + Agent.create_turn = patched_function + + def override(self): self._override_complete() - # self._override_complete_async() + self._override_create_turn() # self._override_stream() # self._override_stream_async() def undo_override(self): if self.original_complete is not None: from llama_stack_client.resources import InferenceResource - InferenceResource.chat_completion = self.original_complete + + if self.original_create_turn is not None: + from llama_stack_client.lib.agents.agent import Agent + Agent.create_turn = self.original_create_turn diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index 1b4bd3f7f..b4e2d96fa 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,7 +17,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 6, "metadata": {}, "outputs": [ { @@ -94,7 +94,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -115,7 +115,7 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -125,9 +125,17 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n" + ] + } + ], "source": [ "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", @@ -150,29 +158,29 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 10, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Silver glowing crescent', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Lunar gentle glow', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=963e5fa7-078f-4119-ab1e-de8cde3cbe77\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 10.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n" ] } ], @@ -202,29 +210,29 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n" + "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 4.2s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9113bc40-0057-4ed1-849a-511eb15ee873\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.4s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n" ] } ], diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py new file mode 100644 index 000000000..cd4d169fa --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -0,0 +1,97 @@ +import asyncio +import os + +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types import Attachment +from llama_stack_client.types.agent_create_params import AgentConfig + +import os +import fire +from llama_stack_client import LlamaStackClient +from llama_stack_client.lib.agents.agent import Agent +from llama_stack_client.lib.agents.event_logger import EventLogger +from llama_stack_client.types.agent_create_params import AgentConfig + +import agentops + +import debugpy +debugpy.listen(5678) +debugpy.wait_for_client() + +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + + +LLAMA_STACK_PORT = 5001 +INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" + +async def agent_test(): + client = LlamaStackClient( + base_url=f"http://localhost:{LLAMA_STACK_PORT}", + ) + + available_shields = [shield.identifier for shield in client.shields.list()] + if not available_shields: + print("No available shields. Disable safety.") + else: + print(f"Available shields found: {available_shields}") + available_models = [model.identifier for model in client.models.list()] + if not available_models: + raise ValueError("No available models") + else: + selected_model = available_models[0] + print(f"Using model: {selected_model}") + + agent_config = AgentConfig( + model=selected_model, + instructions="You are a helpful assistant. Just say hello as a greeting.", + sampling_params={ + "strategy": "greedy", + "temperature": 1.0, + "top_p": 0.9, + }, + tools=[ + { + "type": "brave_search", + "engine": "brave", + "api_key": os.getenv("BRAVE_SEARCH_API_KEY"), + } + ], + tool_choice="auto", + tool_prompt_format="json", + input_shields=available_shields if available_shields else [], + output_shields=available_shields if available_shields else [], + enable_session_persistence=False, + ) + agent = Agent(client, agent_config) + user_prompts = [ + "Hello", + "Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools", + ] + + session_id = agent.create_session("test-session") + + for prompt in user_prompts: + response = agent.create_turn( + messages=[ + { + "role": "user", + "content": prompt, + } + ], + session_id=session_id, + ) + + print("Response: ", response) + + async for log in EventLogger().log(response): + log.print() + + +def main(): + agentops.start_session() + asyncio.run(agent_test()) + agentops.end_session("Success") + +main() diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py similarity index 61% rename from tests/core_manual_tests/providers/llama_stack_client_canary.py rename to tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py index 75bb4ac00..dc4382f81 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py @@ -9,7 +9,11 @@ load_dotenv() -agentops.init(default_tags=["llama-stack-client-provider-test"]) +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + +# import debugpy +# debugpy.listen(5678) +# debugpy.wait_for_client() host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT @@ -20,18 +24,6 @@ base_url=f"{full_host}", ) -response = client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-3B-Instruct", - stream=False, -) - - async def stream_test(): response = client.inference.chat_completion( messages=[ @@ -49,9 +41,20 @@ async def stream_test(): def main(): - asyncio.run(stream_test()) + agentops.start_session() -main() + client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-3B-Instruct", + stream=False, + ) + asyncio.run(stream_test()) + agentops.end_session(end_state="Success") -agentops.end_session(end_state="Success") +main() From 6bf54e5790ad774a26c960b282f7913fbfc2de64 Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 26 Nov 2024 01:57:02 -0500 Subject: [PATCH 10/69] save progress to testing Llama Stack Agent class and Inference class --- agentops/llms/llama_stack_client.py | 79 ++++++++++++++++++----------- 1 file changed, 49 insertions(+), 30 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index d0554c170..8dcddd60e 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -3,7 +3,7 @@ import sys from typing import Dict, Optional -from ..event import LLMEvent, ErrorEvent +from ..event import LLMEvent, ErrorEvent, ToolEvent from ..session import Session from ..log_config import logger from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id @@ -21,35 +21,38 @@ def __init__(self, client): def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict: """Handle responses for LlamaStack""" try: - llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - if session is not None: - llm_event.session_id = session.session_id + accum_delta = None def handle_stream_chunk(chunk: dict): + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: + llm_event.session_id = session.session_id + # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion if llm_event.returns is None: llm_event.returns = chunk.event try: - accumulated_delta = llm_event.returns.delta + nonlocal accum_delta llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = kwargs["model_id"] llm_event.prompt = kwargs["messages"] # NOTE: We assume for completion only choices[0] is relevant - choice = chunk.event - - if choice.delta: - llm_event.returns.delta += choice.delta + # chunk.event - if choice.event_type == "complete": + if chunk.event.event_type == "start": + accum_delta = chunk.event.delta + elif chunk.event.event_type == "progress": + accum_delta += chunk.event.delta + elif chunk.event.event_type == "complete": llm_event.prompt = [ {"content": message.content, "role": message.role} for message in kwargs["messages"] ] llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.completion = accumulated_delta llm_event.prompt_tokens = None + llm_event.completion = accum_delta llm_event.completion_tokens = None llm_event.end_timestamp = get_ISO_time() self._safe_record(session, llm_event) @@ -68,7 +71,11 @@ def handle_stream_chunk(chunk: dict): def handle_stream_agent(chunk: dict): # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + + if session is not None: + llm_event.session_id = session.session_id + if llm_event.returns is None: llm_event.returns = chunk.event @@ -79,29 +86,39 @@ def handle_stream_agent(chunk: dict): pass elif chunk.event.payload.event_type == "step_progress": - if (chunk.event.payload.step_type == "inference"): + if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response): + nonlocal accum_delta delta = chunk.event.payload.text_delta_model_response llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = "Llama Stack" llm_event.prompt = kwargs["messages"] - if llm_event.completion: - llm_event.completion += delta + if accum_delta: + accum_delta += delta else: - llm_event.completion = delta - + accum_delta = delta + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"): + pass + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"): + pass + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"): + pass + elif chunk.event.payload.event_type == "step_complete": - pass + print("Step complete") + if (chunk.event.payload.step_type == "inference"): + llm_event.prompt = [ + {"content": message['content'], "role": message['role']} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.prompt_tokens = None + llm_event.completion = accum_delta + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) elif chunk.event.payload.event_type == "turn_complete": - llm_event.prompt = [ - {"content": message['content'], "role": message['role']} for message in kwargs["messages"] - ] - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = metadata.get("model_id", "Unable to identify model") - llm_event.prompt_tokens = None - llm_event.completion_tokens = None - llm_event.end_timestamp = get_ISO_time() - self._safe_record(session, llm_event) + pass except Exception as e: self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) @@ -128,9 +145,13 @@ async def async_generator(): return async_generator() else: + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: + llm_event.session_id = session.session_id + llm_event.returns = response llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = metadata["model_id"] + llm_event.model = kwargs["model_id"] llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]] llm_event.prompt_tokens = None llm_event.completion = response.completion_message.content @@ -190,8 +211,6 @@ def patched_function(*args, **kwargs): def override(self): self._override_complete() self._override_create_turn() - # self._override_stream() - # self._override_stream_async() def undo_override(self): if self.original_complete is not None: From 3dc0d2fcb6b22a07a53b578e6bb92f33b42bad34 Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 26 Nov 2024 02:01:17 -0500 Subject: [PATCH 11/69] minor edits --- agentops/llms/llama_stack_client.py | 1 - .../providers/llama_stack_client_canary/agent_canary.py | 6 +++--- 2 files changed, 3 insertions(+), 4 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 8dcddd60e..2a1485698 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -105,7 +105,6 @@ def handle_stream_agent(chunk: dict): pass elif chunk.event.payload.event_type == "step_complete": - print("Step complete") if (chunk.event.payload.step_type == "inference"): llm_event.prompt = [ {"content": message['content'], "role": message['role']} for message in kwargs["messages"] diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index cd4d169fa..9921c4d69 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -16,9 +16,9 @@ import agentops -import debugpy -debugpy.listen(5678) -debugpy.wait_for_client() +# import debugpy +# debugpy.listen(5678) +# debugpy.wait_for_client() agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) From b815ef3740debb9d661a0d1f2e2cc7b8069801c4 Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 26 Nov 2024 11:58:39 -0500 Subject: [PATCH 12/69] removing unneeded code --- agentops/llms/llama_stack_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 2a1485698..a5d06f90d 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -90,7 +90,6 @@ def handle_stream_agent(chunk: dict): nonlocal accum_delta delta = chunk.event.payload.text_delta_model_response llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = "Llama Stack" llm_event.prompt = kwargs["messages"] if accum_delta: From 888b6351b2ff5f5356846818aaed55d0e3328ccd Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 26 Nov 2024 12:02:08 -0500 Subject: [PATCH 13/69] format line --- agentops/llms/llama_stack_client.py | 1 - 1 file changed, 1 deletion(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index a5d06f90d..5c060da60 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -140,7 +140,6 @@ async def async_generator(): async for chunk in response: handle_stream_agent(chunk) yield chunk - return async_generator() else: llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) From 1c7c1deecf673db4d16a9487a8048e06b65171fd Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 26 Nov 2024 14:00:03 -0500 Subject: [PATCH 14/69] adding support for monitoring tools --- agentops/llms/llama_stack_client.py | 28 ++++++++++++++++++++++------ 1 file changed, 22 insertions(+), 6 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 5c060da60..8fbd2b29d 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -22,6 +22,7 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se """Handle responses for LlamaStack""" try: accum_delta = None + accum_tool_delta = None def handle_stream_chunk(chunk: dict): llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) @@ -72,6 +73,7 @@ def handle_stream_agent(chunk: dict): # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + tool_event = ToolEvent(init_timestamp=init_timestamp, params=kwargs) if session is not None: llm_event.session_id = session.session_id @@ -80,9 +82,9 @@ def handle_stream_agent(chunk: dict): llm_event.returns = chunk.event try: - if chunk.event.payload.event_type == "step_start": + if chunk.event.payload.event_type == "turn_start": pass - elif chunk.event.payload.event_type == "turn_start": + elif chunk.event.payload.event_type == "step_start": pass elif chunk.event.payload.event_type == "step_progress": @@ -97,11 +99,21 @@ def handle_stream_agent(chunk: dict): else: accum_delta = delta elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"): - pass + tool_event.name = "ToolExecution - started" + self._safe_record(session, tool_event) elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"): - pass + nonlocal accum_tool_delta + delta = chunk.event.payload.tool_call_delta.content + if accum_tool_delta: + accum_tool_delta += delta + else: + accum_tool_delta = delta elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"): - pass + tool_event.name = "ToolExecution - success" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "failure"): + self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) elif chunk.event.payload.event_type == "step_complete": if (chunk.event.payload.step_type == "inference"): @@ -111,10 +123,14 @@ def handle_stream_agent(chunk: dict): llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = metadata.get("model_id", "Unable to identify model") llm_event.prompt_tokens = None - llm_event.completion = accum_delta + llm_event.completion = accum_delta or kwargs["completion"] llm_event.completion_tokens = None llm_event.end_timestamp = get_ISO_time() self._safe_record(session, llm_event) + elif (chunk.event.payload.step_type == "tool_execution"): + tool_event.name = "ToolExecution - complete" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) elif chunk.event.payload.event_type == "turn_complete": pass From 187963b05c6fe86a97b42bf0e5b565c2bb474dbc Mon Sep 17 00:00:00 2001 From: tad dy Date: Wed, 27 Nov 2024 12:40:39 -0500 Subject: [PATCH 15/69] for completeness --- agentops/llms/__init__.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py index 3c29167c6..b26cd1233 100644 --- a/agentops/llms/__init__.py +++ b/agentops/llms/__init__.py @@ -38,7 +38,7 @@ class LlmTracker: }, "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")}, "llama_stack_client": { - "0.0.53": ("resources.InferenceResource.chat_completion"), + "0.0.53": ("resources.InferenceResource.chat_completion", "lib.agents.agent.Agent.create_turn"), }, "groq": { "0.9.0": ("Client.chat", "AsyncClient.chat"), From c1a58f289cb2d8f47a540d01805280b8bc157e87 Mon Sep 17 00:00:00 2001 From: reibs Date: Fri, 29 Nov 2024 12:44:52 -0500 Subject: [PATCH 16/69] remove logs --- .../llama_stack_client_examples.ipynb | 63 +------------------ 1 file changed, 2 insertions(+), 61 deletions(-) diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index b4e2d96fa..098fa2f68 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,68 +17,9 @@ }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", - "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", - "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", - "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", - "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", - "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n", - "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n", - "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U agentops\n", From ac3e01e1262921e9aa30037bae8380a2cad2e571 Mon Sep 17 00:00:00 2001 From: tad dy Date: Fri, 29 Nov 2024 23:35:50 -0500 Subject: [PATCH 17/69] implemeting code review --- agentops/llms/llama_stack_client.py | 8 +- .../llama_stack_client_examples/README.md | 73 ++++++++++++++++--- .../docker-compose.yaml | 57 +++++++++++++++ .../llama-stack-server-config.yaml | 2 +- .../llama_stack_client_examples.ipynb | 68 +++++++++++------ .../llama_stack_client_canary/agent_canary.py | 2 +- 6 files changed, 169 insertions(+), 41 deletions(-) create mode 100644 examples/llama_stack_client_examples/docker-compose.yaml diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 8fbd2b29d..7ed9f14f6 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -3,11 +3,11 @@ import sys from typing import Dict, Optional -from ..event import LLMEvent, ErrorEvent, ToolEvent -from ..session import Session -from ..log_config import logger +from agentops.event import LLMEvent, ErrorEvent, ToolEvent +from agentops.session import Session +from agentops.log_config import logger from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id -from .instrumented_provider import InstrumentedProvider +from agentops.llms.instrumented_provider import InstrumentedProvider class LlamaStackClientProvider(InstrumentedProvider): original_complete = None diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 176b8c8a3..f88666a1a 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -5,35 +5,37 @@ How to set up a Llama Stack server for supporting the `llama_stack_client_exampl ## Disclaimer As of 11/2024, Llama Stack is new and is subject to breaking changes. - Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/ -## High-level steps +## ToC -https://llama-stack.readthedocs.io/en/latest/getting_started/index.html# +1. Running the Ollama Server and Llama Stack Server on the Host + - a) Download, install, & start Ollama + - b) Start the Llama Stack Server + - c) Call the Llama Stack Server with a Llama Stack Client +2. Running the Ollama Server in a Docker Container -1. Download, install, & start Ollama -2. Start the Llama Stack Server -3. Call the Llama Stack Server with a Llama Stack Client +## Running the Ollama Server and Llama Stack Server on the Host -### 1 - Download, install, & start Ollama +### 1a - Download, install, & start Ollama https://ollama.com/ Ollama has an easy-to-use installer available for macOS, Linux, and Windows. ```sh -export OLLAMA_INFERENCE_MODEL="llama3.2:3b-instruct-fp16" +export OLLAMA_INFERENCE_MODEL="llama3.2:1b-instruct-fp16" ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m +ollama run llama3.2:1b --keepalive 60m ``` -### 2 - Start the Llama Stack server +### 1b - Start the Llama Stack server You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml` ```sh export LLAMA_STACK_PORT=5001 -export INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" +export INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct" docker run \ -it \ -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ @@ -46,10 +48,58 @@ docker run \ --env OLLAMA_URL=http://host.docker.internal:11434 ``` -### 3 - Call the Llama Stack Server with a Llama Stack Client +```sh +docker run \ + -it \ + -p 5001:5001 \ + -v ~/.llama:/root/.llama \ + -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \ + llamastack/distribution-ollama \ + --yaml-config /root/my-run.yaml \ + --port 5001 \ + --env INFERENCE_MODEL=meta-llama/Llama-3.2-1B \ + --env OLLAMA_URL=http://host.docker.internal:11434 +``` + + +### 1c - Call the Llama Stack Server with a Llama Stack Client ie: Check out the examples in the `llama_stack_client_examples.ipynb` file +## Running the Ollama Server in a Docker Container + +```sh - set up the ollama server +docker-compose -f docker-compose.yaml up +``` + +```sh - download a model +curl -X POST http://localhost:11434/api/pull -d '{"model": "llama3.2:1b"}' +``` + +```sh - test the model +curl http://localhost:11434/api/generate -d '{ + "model": "llama3.2:1b", + "prompt": "Why is the sky blue?" +}' + +curl http://localhost:11434/api/chat -d '{ + "model": "llama3.2:1b", + "messages": [ + { + "role": "user", + "content": "why is the sky blue?" + } + ], + "stream": false +}' +``` + +## 2 - Running the Ollama Server in a Docker Container + +```sh +docker-compose -f docker-compose.yaml up +``` + ## Common Gotchas 1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` @@ -68,3 +118,4 @@ ie: Check out the examples in the `llama_stack_client_examples.ipynb` file - https://github.com/meta-llama/llama-stack - download https://ollama.com/ - https://www.llama.com/docs/getting_the_models/meta/ +- https://llama-stack.readthedocs.io/en/latest/getting_started/index.html diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml new file mode 100644 index 000000000..a4ed6e416 --- /dev/null +++ b/examples/llama_stack_client_examples/docker-compose.yaml @@ -0,0 +1,57 @@ +version: '3.8' + +services: + # Ollama server service + ollama: + image: ollama/ollama:latest + container_name: ollama_server + ports: + - "11434:11434" # Map Ollama's port to host + volumes: + - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models) + entrypoint: ["ollama", "serve"] # Start the Ollama server + restart: always # Ensure Ollama server restarts on failure + + # Ephemeral service to trigger model download + model_downloader: + image: curlimages/curl:latest # Use a lightweight image with curl + depends_on: + - ollama # Ensure the Ollama server starts first + entrypoint: > + sh -c "sleep 5 && + curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:1b-instruct-fp16\"}'" + restart: "no" # Ensure this service doesn't restart + + + tester: + image: curlimages/curl:latest # Use a lightweight image with curl + depends_on: + - model_downloader # Ensure the Ollama server starts first + entrypoint: > + sh -c "sleep 5 && + curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:1b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" + restart: "no" # Ensure this service doesn't restart + + llama-stack: + image: llamastack/distribution-ollama + container_name: llama_stack_server + ports: + - "5001:5001" + volumes: + - "~/.ollama/models:/root/.ollama" + - "./llama-stack-server-config.yaml:/root/my-run.yaml" + environment: + - INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct + - OLLAMA_URL=http://ollama:11434 + command: > + --yaml-config /root/my-run.yaml + --port 5001 + platform: linux/amd64 + depends_on: + - ollama + - model_downloader + - tester + +networks: + default: + driver: bridge diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml index 32137fd67..c51a454eb 100644 --- a/examples/llama_stack_client_examples/llama-stack-server-config.yaml +++ b/examples/llama_stack_client_examples/llama-stack-server-config.yaml @@ -13,7 +13,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + url: ${env.OLLAMA_URL} memory: - provider_id: faiss provider_type: inline::faiss diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index 098fa2f68..bfb858863 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -35,7 +35,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 8, "metadata": {}, "outputs": [], "source": [ @@ -56,7 +56,7 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -66,7 +66,7 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": 16, "metadata": {}, "outputs": [ { @@ -99,29 +99,35 @@ }, { "cell_type": "code", - "execution_count": 10, + "execution_count": 18, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "> Response: ChatCompletionResponse(completion_message=CompletionMessage(content='Lunar gentle glow', role='assistant', stop_reason='end_of_turn', tool_calls=[]), logprobs=None)\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f3dde247-e5bd-4d25-ab2a-08612270cb08\u001b[0m\u001b[0m\n" ] }, { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 10.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=a590ad9c-55a3-412b-a365-a6697d1fbfa8\u001b[0m\u001b[0m\n" + "ename": "InternalServerError", + "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[18], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:199\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 199\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1248\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1249\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1256\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1258\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1259\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1260\u001b[0m )\n\u001b[0;32m-> 1261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:953\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 951\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 953\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 954\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 955\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1040\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1042\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1043\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1040\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1042\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1043\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1056\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1053\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1055\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1056\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1058\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1059\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1060\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1064\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1065\u001b[0m )\n", + "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" ] } ], @@ -134,7 +140,7 @@ " role=\"user\",\n", " ),\n", " ],\n", - " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", " stream=False\n", ")\n", "\n", @@ -151,29 +157,43 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": 19, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5d7c69df-a2e7-4405-810b-8c9283c30a10\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" + "\u001b[97m\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.4s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=aef6931a-9a81-4de5-bf36-70b7e7bd6249\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n" ] } ], @@ -186,7 +206,7 @@ " role=\"user\",\n", " ),\n", " ],\n", - " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", " stream=True\n", ")\n", "\n", diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 9921c4d69..5f54abeb0 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -24,7 +24,7 @@ LLAMA_STACK_PORT = 5001 -INFERENCE_MODEL="meta-llama/Llama-3.2-3B-Instruct" +INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct" async def agent_test(): client = LlamaStackClient( From 8122c3f19b8592fe77bdb1ea0b483b87057ba658 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 03:48:11 -0500 Subject: [PATCH 18/69] saving progress of getting agent monkeypatch tested in the ipynb --- agentops/llms/llama_stack_client.py | 34 +-- .../llama_stack_client_examples.ipynb | 256 +++++++++++++++--- .../inference_canary.py | 22 +- 3 files changed, 253 insertions(+), 59 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 7ed9f14f6..b9ed79ad3 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -98,22 +98,24 @@ def handle_stream_agent(chunk: dict): accum_delta += delta else: accum_delta = delta - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "started"): - tool_event.name = "ToolExecution - started" - self._safe_record(session, tool_event) - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "in_progress"): - nonlocal accum_tool_delta - delta = chunk.event.payload.tool_call_delta.content - if accum_tool_delta: - accum_tool_delta += delta - else: - accum_tool_delta = delta - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "success"): - tool_event.name = "ToolExecution - success" - tool_event.params["completion"] = accum_tool_delta - self._safe_record(session, tool_event) - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta and chunk.event.payload.tool_call_delta.parse_status == "failure"): - self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): + + if (chunk.event.payload.tool_call_delta.parse_status == "started"): + tool_event.name = "ToolExecution - started" + self._safe_record(session, tool_event) + elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"): + nonlocal accum_tool_delta + delta = chunk.event.payload.tool_call_delta.content + if accum_tool_delta: + accum_tool_delta += delta + else: + accum_tool_delta = delta + elif (chunk.event.payload.tool_call_delta.parse_status == "success"): + tool_event.name = "ToolExecution - success" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) + elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): + self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) elif chunk.event.payload.event_type == "step_complete": if (chunk.event.payload.step_type == "inference"): diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index bfb858863..d79971260 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,9 +17,68 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 2, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", + "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", + "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", + "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", + "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", + "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n", + "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n", + "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U agentops\n", @@ -35,16 +94,19 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 27, "metadata": {}, "outputs": [], "source": [ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.inference.event_logger import EventLogger\n", "from llama_stack_client.types import UserMessage\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "from llama_stack_client.lib.agents.agent import Agent\n", "from dotenv import load_dotenv\n", "import os\n", - "import agentops" + "import agentops\n", + "import asyncio" ] }, { @@ -54,9 +116,14 @@ "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [] + }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 28, "metadata": {}, "outputs": [], "source": [ @@ -66,17 +133,9 @@ }, { "cell_type": "code", - "execution_count": 16, + "execution_count": 29, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n" - ] - } - ], + "outputs": [], "source": [ "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", @@ -99,7 +158,7 @@ }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -132,20 +191,20 @@ } ], "source": [ - "agentops.start_session()\n", - "response = client.inference.chat_completion(\n", - " messages=[\n", - " UserMessage(\n", - " content=\"write me a 3 word poem about the moon\",\n", - " role=\"user\",\n", - " ),\n", - " ],\n", - " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", - " stream=False\n", - ")\n", + "# agentops.start_session()\n", + "# response = client.inference.chat_completion(\n", + "# messages=[\n", + "# UserMessage(\n", + "# content=\"write me a 3 word poem about the moon\",\n", + "# role=\"user\",\n", + "# ),\n", + "# ],\n", + "# model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + "# stream=False\n", + "# )\n", "\n", - "print(f\"> Response: {response}\")\n", - "agentops.end_session(\"Success\")" + "# print(f\"> Response: {response}\")\n", + "# agentops.end_session(\"Success\")" ] }, { @@ -157,21 +216,23 @@ }, { "cell_type": "code", - "execution_count": 19, + "execution_count": 84, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5d7c69df-a2e7-4405-810b-8c9283c30a10\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=117bbe62-4f2d-4d33-bec9-ac9374ac6092\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m" + "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", + "\n", + "\u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m" ] }, { @@ -216,6 +277,137 @@ "agentops.end_session(\"Success\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=984ed603-12a9-4c76-95b1-36c327a0b6d4\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No available shields. Disable safety.\n", + "Using model: meta-llama/Llama-3.2-1B-Instruct\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", + "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n", + "response:\n", + " \n", + "kwargs:\n", + " {'messages': [{'content': 'Hello', 'role': 'user'}],\n", + " 'session_id': '08402a4a-7991-4831-b53c-893a809898af'}\n", + "\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Response: \n" + ] + } + ], + "source": [ + "import nest_asyncio\n", + "import asyncio\n", + "import os\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", + "# Apply nest_asyncio to handle nested event loops\n", + "# nest_asyncio.apply()\n", + "\n", + "LLAMA_STACK_PORT = 5001\n", + "\n", + "# Replace with actual API keys for functionality\n", + "os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"your-brave-search-api-key\"\n", + "\n", + "async def agent_test():\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://localhost:{LLAMA_STACK_PORT}\",\n", + " )\n", + "\n", + " available_shields = [shield.identifier for shield in client.shields.list()]\n", + " if not available_shields:\n", + " print(\"No available shields. Disable safety.\")\n", + " else:\n", + " print(f\"Available shields found: {available_shields}\")\n", + " available_models = [model.identifier for model in client.models.list()]\n", + " if not available_models:\n", + " raise ValueError(\"No available models\")\n", + " else:\n", + " selected_model = available_models[0]\n", + " print(f\"Using model: {selected_model}\")\n", + "\n", + " agent_config = AgentConfig(\n", + " model=selected_model,\n", + " instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n", + " sampling_params={\n", + " \"strategy\": \"greedy\",\n", + " \"temperature\": 1.0,\n", + " \"top_p\": 0.9,\n", + " },\n", + " tools=[\n", + " {\n", + " \"type\": \"brave_search\",\n", + " \"engine\": \"brave\",\n", + " \"api_key\": os.getenv(\"BRAVE_SEARCH_API_KEY\"),\n", + " }\n", + " ],\n", + " tool_choice=\"auto\",\n", + " tool_prompt_format=\"json\",\n", + " input_shields=available_shields if available_shields else [],\n", + " output_shields=available_shields if available_shields else [],\n", + " enable_session_persistence=False,\n", + " )\n", + " agent = Agent(client, agent_config)\n", + " user_prompts = [\n", + " \"Hello\",\n", + " \"Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools\",\n", + " ]\n", + "\n", + " session_id = agent.create_session(\"test-session\")\n", + "\n", + " for prompt in user_prompts:\n", + " response = agent.create_turn(\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt,\n", + " }\n", + " ],\n", + " session_id=session_id,\n", + " )\n", + "\n", + " print(\"Response: \", response)\n", + "\n", + " async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "\n", + "def main():\n", + " agentops.start_session()\n", + " asyncio.run(agent_test())\n", + " agentops.end_session(\"Success\")\n", + "\n", + "main()" + ] + }, { "cell_type": "code", "execution_count": null, diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py index dc4382f81..38dec66cc 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py @@ -32,7 +32,7 @@ async def stream_test(): role="user", ), ], - model_id="meta-llama/Llama-3.2-3B-Instruct", + model_id="meta-llama/Llama-3.2-1B-Instruct", stream=True, ) @@ -43,16 +43,16 @@ async def stream_test(): def main(): agentops.start_session() - client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-3B-Instruct", - stream=False, - ) + # client.inference.chat_completion( + # messages=[ + # UserMessage( + # content="hello world, write me a 3 word poem about the moon", + # role="user", + # ), + # ], + # model_id="meta-llama/Llama-3.2-1B-Instruct", + # stream=False, + # ) asyncio.run(stream_test()) agentops.end_session(end_state="Success") From b131246bd55feb247d6d495a4e5803f22fb792f8 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 07:57:26 -0500 Subject: [PATCH 19/69] saving testing scaffold and preliminary fireworks setup/support --- .../README.fireworks.md | 9 +++ .../fireworks-compose.yaml | 16 +++++ .../fireworks-server-config.yaml | 59 +++++++++++++++++++ .../llama_stack_client_examples.ipynb | 51 +++++++--------- tests/llama_stack/test_llama_stack.py | 57 ++++++++++++++++++ 5 files changed, 161 insertions(+), 31 deletions(-) create mode 100644 examples/llama_stack_client_examples/README.fireworks.md create mode 100644 examples/llama_stack_client_examples/fireworks-compose.yaml create mode 100644 examples/llama_stack_client_examples/fireworks-server-config.yaml create mode 100644 tests/llama_stack/test_llama_stack.py diff --git a/examples/llama_stack_client_examples/README.fireworks.md b/examples/llama_stack_client_examples/README.fireworks.md new file mode 100644 index 000000000..412821953 --- /dev/null +++ b/examples/llama_stack_client_examples/README.fireworks.md @@ -0,0 +1,9 @@ +## + +https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml + +## + +```sh +docker-compose -f fireworks-server-config.yaml up +``` diff --git a/examples/llama_stack_client_examples/fireworks-compose.yaml b/examples/llama_stack_client_examples/fireworks-compose.yaml new file mode 100644 index 000000000..fcac78a29 --- /dev/null +++ b/examples/llama_stack_client_examples/fireworks-compose.yaml @@ -0,0 +1,16 @@ +services: + llamastack: + image: llamastack/distribution-fireworks + network_mode: "host" + volumes: + - ~/.llama:/root/.llama + - ./run.yaml:/root/llamastack-run-fireworks.yaml + ports: + - "5000:5000" + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-fireworks.yaml" + deploy: + restart_policy: + condition: on-failure + delay: 3s + max_attempts: 5 + window: 60s \ No newline at end of file diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks-server-config.yaml new file mode 100644 index 000000000..2f8f8429e --- /dev/null +++ b/examples/llama_stack_client_examples/fireworks-server-config.yaml @@ -0,0 +1,59 @@ +version: '2' +image_name: fireworks +docker_image: null +conda_env: fireworks +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: fireworks + provider_type: remote::fireworks + config: + url: "https://api.fireworks.ai/inference" + api_key: "fw_3ZVeWz59L6eAVPG1GRnCm7wW" + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db +models: +- metadata: {} + model_id: meta-llama/Llama-3.2-1B-Instruct + provider_id: null + provider_model_id: fireworks/llama-v3p2-1b-instruct +shields: +- params: null + shield_id: meta-llama/Llama-Guard-3-8B + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index d79971260..ccf768932 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -279,24 +279,9 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 54, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=984ed603-12a9-4c76-95b1-36c327a0b6d4\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "No available shields. Disable safety.\n", - "Using model: meta-llama/Llama-3.2-1B-Instruct\n" - ] - }, { "name": "stderr", "output_type": "stream", @@ -305,10 +290,22 @@ "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n", "response:\n", - " \n", + " \n", "kwargs:\n", " {'messages': [{'content': 'Hello', 'role': 'user'}],\n", - " 'session_id': '08402a4a-7991-4831-b53c-893a809898af'}\n", + " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n", + "\n", + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", + "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n", + "response:\n", + " \n", + "kwargs:\n", + " {'messages': [{'content': 'Which players played in the winning team of the NBA '\n", + " 'western conference semifinals of 2024, please use '\n", + " 'tools',\n", + " 'role': 'user'}],\n", + " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n", "\n" ] }, @@ -316,13 +313,14 @@ "name": "stdout", "output_type": "stream", "text": [ - "Response: \n" + "No available shields. Disable safety.\n", + "Using model: meta-llama/Llama-3.2-1B-Instruct\n", + "Response: \n", + "Response: \n" ] } ], "source": [ - "import nest_asyncio\n", - "import asyncio\n", "import os\n", "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.agents.agent import Agent\n", @@ -396,16 +394,7 @@ "\n", " print(\"Response: \", response)\n", "\n", - " async for log in EventLogger().log(response):\n", - " log.print()\n", - "\n", - "\n", - "def main():\n", - " agentops.start_session()\n", - " asyncio.run(agent_test())\n", - " agentops.end_session(\"Success\")\n", - "\n", - "main()" + "await agent_test()" ] }, { diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py new file mode 100644 index 000000000..b93a18567 --- /dev/null +++ b/tests/llama_stack/test_llama_stack.py @@ -0,0 +1,57 @@ +import pytest +import requests_mock +import time + +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from llama_stack_client.lib.inference.event_logger import EventLogger + + +@pytest.fixture(autouse=True) +def setup_teardown(): + yield + + +@pytest.fixture(autouse=True, scope="function") +def mock_req(): + with requests_mock.Mocker() as m: + url = "http://localhost:5001" + m.post(url + "/v2/create_events", json={"status": "ok"}) + m.post(url + "/v2/create_session", json={"status": "success", "jwt": "some_jwt"}) + + yield m + + +class TestLlamaStack: + def setup_method(self): + + print("...Setting up LlamaStackClient...") + + host = "0.0.0.0" # LLAMA_STACK_HOST + port = 5001 # LLAMA_STACK_PORT + + full_host = f"http://{host}:{port}" + + self.client = LlamaStackClient( + base_url=f"{full_host}", + ) + + + def test_llama_stack_inference(self, mock_req): + + response = self.client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=False, + ) + + # async for log in EventLogger().log(response): + # log.print() + + print(response) + From ae572ba6d15481ddab6ab6653457ef5a368cde8d Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 08:14:39 -0500 Subject: [PATCH 20/69] remove Fireworks API key --- .../llama_stack_client_examples/fireworks-server-config.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks-server-config.yaml index 2f8f8429e..cb9dd2cbc 100644 --- a/examples/llama_stack_client_examples/fireworks-server-config.yaml +++ b/examples/llama_stack_client_examples/fireworks-server-config.yaml @@ -14,7 +14,7 @@ providers: provider_type: remote::fireworks config: url: "https://api.fireworks.ai/inference" - api_key: "fw_3ZVeWz59L6eAVPG1GRnCm7wW" + api_key: "" memory: - provider_id: faiss provider_type: inline::faiss From 0a12c5c3cdc2f16830a29ad7995bf7d3426f350b Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 08:18:52 -0500 Subject: [PATCH 21/69] removing uneeded global --- agentops/llms/llama_stack_client.py | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index b9ed79ad3..25bb65ff9 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -207,8 +207,7 @@ def patched_function(*args, **kwargs): def _override_create_turn(self): from llama_stack_client.lib.agents.agent import Agent - global original_create_turn - original_create_turn = Agent.create_turn + self.original_create_turn = Agent.create_turn def patched_function(*args, **kwargs): # Call the original function with its original arguments @@ -216,7 +215,7 @@ def patched_function(*args, **kwargs): session = kwargs.get("session", None) if "session" in kwargs.keys(): del kwargs["session"] - result = original_create_turn(*args, **kwargs) + result = self.original_create_turn(*args, **kwargs) return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")}) # Override the original method with the patched one From 9a43d74cd95c8bcabd21984bfd33d8f883075dbf Mon Sep 17 00:00:00 2001 From: Teo Date: Sat, 30 Nov 2024 12:28:40 -0600 Subject: [PATCH 22/69] enhance(compose): remove deprecate version attr Signed-off-by: Teo --- examples/llama_stack_client_examples/docker-compose.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml index a4ed6e416..ae5362ab3 100644 --- a/examples/llama_stack_client_examples/docker-compose.yaml +++ b/examples/llama_stack_client_examples/docker-compose.yaml @@ -1,5 +1,3 @@ -version: '3.8' - services: # Ollama server service ollama: From 13950fcd6e1b4efc5c4b64c82aad8e414df8bb81 Mon Sep 17 00:00:00 2001 From: Teo Date: Sat, 30 Nov 2024 12:42:36 -0600 Subject: [PATCH 23/69] Removing some redundancies Signed-off-by: Teo --- .../llama_stack_client_canary/agent_canary.py | 24 ++++++------------- 1 file changed, 7 insertions(+), 17 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 5f54abeb0..b466f45d0 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -7,25 +7,18 @@ from llama_stack_client.types import Attachment from llama_stack_client.types.agent_create_params import AgentConfig -import os -import fire -from llama_stack_client import LlamaStackClient -from llama_stack_client.lib.agents.agent import Agent -from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types.agent_create_params import AgentConfig - import agentops +LLAMA_STACK_PORT = 5001 +INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" + # import debugpy # debugpy.listen(5678) # debugpy.wait_for_client() -agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) +agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False) -LLAMA_STACK_PORT = 5001 -INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct" - async def agent_test(): client = LlamaStackClient( base_url=f"http://localhost:{LLAMA_STACK_PORT}", @@ -89,9 +82,6 @@ async def agent_test(): log.print() -def main(): - agentops.start_session() - asyncio.run(agent_test()) - agentops.end_session("Success") - -main() +agentops.start_session() +asyncio.run(agent_test()) +agentops.end_session("Success") From fe06a44e7381817acbecd0accef50306562c3405 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 14:18:33 -0500 Subject: [PATCH 24/69] saving tweak to custom docker-compose.yaml for llama stack --- .../llama_stack_client_examples/README.md | 2 +- .../docker-compose.yaml | 6 +- .../llama_stack_client_examples.ipynb | 63 +++++++++++++--- .../llama_stack_ollama/README.md | 4 + .../llama_stack_ollama/compose.yaml | 73 +++++++++++++++++++ .../llama_stack_ollama/pull-models.sh | 18 +++++ .../llama_stack_ollama/run-with-safety.yaml | 62 ++++++++++++++++ .../llama_stack_ollama/run.yaml | 54 ++++++++++++++ 8 files changed, 267 insertions(+), 15 deletions(-) create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/README.md create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml create mode 100755 examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml create mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/run.yaml diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index f88666a1a..d560733f4 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -102,7 +102,7 @@ docker-compose -f docker-compose.yaml up ## Common Gotchas -1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:3b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` +1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` ## Useful ollama commands diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml index ae5362ab3..4ba388ab4 100644 --- a/examples/llama_stack_client_examples/docker-compose.yaml +++ b/examples/llama_stack_client_examples/docker-compose.yaml @@ -17,7 +17,7 @@ services: - ollama # Ensure the Ollama server starts first entrypoint: > sh -c "sleep 5 && - curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:1b-instruct-fp16\"}'" + curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'" restart: "no" # Ensure this service doesn't restart @@ -27,7 +27,7 @@ services: - model_downloader # Ensure the Ollama server starts first entrypoint: > sh -c "sleep 5 && - curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:1b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" + curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" restart: "no" # Ensure this service doesn't restart llama-stack: @@ -39,7 +39,7 @@ services: - "~/.ollama/models:/root/.ollama" - "./llama-stack-server-config.yaml:/root/my-run.yaml" environment: - - INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct + - INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct - OLLAMA_URL=http://ollama:11434 command: > --yaml-config /root/my-run.yaml diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index ccf768932..917a5a852 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,9 +17,16 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 59, "metadata": {}, "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "41840.28s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + ] + }, { "name": "stdout", "output_type": "stream", @@ -56,7 +63,20 @@ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "41847.06s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", @@ -70,7 +90,20 @@ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "41853.46s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", @@ -94,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": 60, "metadata": {}, "outputs": [], "source": [ @@ -123,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": 61, "metadata": {}, "outputs": [], "source": [ @@ -133,9 +166,17 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": 62, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n" + ] + } + ], "source": [ "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", @@ -216,14 +257,14 @@ }, { "cell_type": "code", - "execution_count": 84, + "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=117bbe62-4f2d-4d33-bec9-ac9374ac6092\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=16acb375-5b2d-4c7d-a086-276a333ffad4\u001b[0m\u001b[0m\n" ] }, { @@ -232,7 +273,7 @@ "text": [ "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", "\n", - "\u001b[0m\u001b[33mL\u001b[0m\u001b[33munar\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m glow\u001b[0m" + "\u001b[0m\u001b[33m\"M\u001b[0m\u001b[33moon\u001b[0m\u001b[33m's\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m silver\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m\"\u001b[0m" ] }, { @@ -267,7 +308,7 @@ " role=\"user\",\n", " ),\n", " ],\n", - " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", " stream=True\n", ")\n", "\n", diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/README.md b/examples/llama_stack_client_examples/llama_stack_ollama/README.md new file mode 100644 index 000000000..751820d8a --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_ollama/README.md @@ -0,0 +1,4 @@ + +chmod +x pull_models.sh + +docker-compose -f compose.yaml up \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml new file mode 100644 index 000000000..9eff1970d --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml @@ -0,0 +1,73 @@ +services: + ollama: + image: ollama/ollama:latest + container_name: ollama + network_mode: ${NETWORK_MODE:-bridge} + volumes: + - ~/.ollama:/root/.ollama + ports: + - "11434:11434" + environment: + OLLAMA_DEBUG: 1 + command: [] + deploy: + resources: + limits: + memory: 8G # Set maximum memory + reservations: + memory: 8G # Set minimum memory reservation + # healthcheck: + # # ugh, no CURL in ollama image + # test: ["CMD", "curl", "-f", "http://ollama:11434"] + # interval: 10s + # timeout: 5s + # retries: 5 + + ollama-init: + image: ollama/ollama:latest + depends_on: + - ollama + # condition: service_healthy + network_mode: ${NETWORK_MODE:-bridge} + container_name: ollama-init + environment: + - OLLAMA_HOST=ollama + - INFERENCE_MODEL=${INFERENCE_MODEL} + - SAFETY_MODEL=${SAFETY_MODEL:-} + volumes: + - ~/.ollama:/root/.ollama + - ./pull-models.sh:/root/pull-models.sh + entrypoint: ["/root/pull-models.sh"] + + llamastack: + depends_on: + ollama: + condition: service_started + ollama-init: + condition: service_started + image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} + network_mode: ${NETWORK_MODE:-bridge} + volumes: + - ~/.llama:/root/.llama + # Link to ollama run.yaml file + - ~/local/llama-stack/:/app/llama-stack-source + - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml + ports: + - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" + environment: + - INFERENCE_MODEL=${INFERENCE_MODEL} + - SAFETY_MODEL=${SAFETY_MODEL:-} + - OLLAMA_URL=http://ollama:11434 + entrypoint: > + python -m llama_stack.distribution.server.server /root/my-run.yaml \ + --port ${LLAMA_STACK_PORT:-5001} + deploy: + restart_policy: + condition: on-failure + delay: 10s + max_attempts: 3 + window: 60s +volumes: + ollama: + ollama-init: + llamastack: \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh b/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh new file mode 100755 index 000000000..cd0690290 --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh @@ -0,0 +1,18 @@ +#!/bin/sh + +# Copyright (c) Meta Platforms, Inc. and affiliates. +# All rights reserved. +# +# This source code is licensed under the terms described in the LICENSE file in +# the root directory of this source tree. + +echo "Preloading (${INFERENCE_MODEL}, ${SAFETY_MODEL})..." +for model in ${INFERENCE_MODEL} ${SAFETY_MODEL}; do + echo "Preloading $model..." + if ! ollama run "$model"; then + echo "Failed to pull and run $model" + exit 1 + fi +done + +echo "All models pulled successfully" \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml new file mode 100644 index 000000000..2e4f6ac8a --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml @@ -0,0 +1,62 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: ollama +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +- metadata: {} + model_id: ${env.SAFETY_MODEL} + provider_id: ollama + provider_model_id: null +shields: +- params: null + shield_id: ${env.SAFETY_MODEL} + provider_id: null + provider_shield_id: null +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml b/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml new file mode 100644 index 000000000..32137fd67 --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml @@ -0,0 +1,54 @@ +version: '2' +image_name: ollama +docker_image: null +conda_env: ollama +apis: +- agents +- inference +- memory +- safety +- telemetry +providers: + inference: + - provider_id: ollama + provider_type: remote::ollama + config: + url: ${env.OLLAMA_URL:http://localhost:11434} + memory: + - provider_id: faiss + provider_type: inline::faiss + config: + kvstore: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db + safety: + - provider_id: llama-guard + provider_type: inline::llama-guard + config: {} + agents: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: + persistence_store: + type: sqlite + namespace: null + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db + telemetry: + - provider_id: meta-reference + provider_type: inline::meta-reference + config: {} +metadata_store: + namespace: null + type: sqlite + db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db +models: +- metadata: {} + model_id: ${env.INFERENCE_MODEL} + provider_id: ollama + provider_model_id: null +shields: [] +memory_banks: [] +datasets: [] +scoring_fns: [] +eval_tasks: [] \ No newline at end of file From 65a5ab4fdcf310326f191d4b870d4f553591e3ea Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 30 Nov 2024 15:08:22 -0500 Subject: [PATCH 25/69] saving solid docker-compose for spinning up ollama with a llama-stack --- .../llama_stack_client_examples/README.md | 2 + .../docker-compose.yaml | 68 +++++++++++++------ .../llama_stack_client_examples.ipynb | 56 ++++++++------- .../{llama_stack_ollama => }/pull-models.sh | 0 4 files changed, 78 insertions(+), 48 deletions(-) rename examples/llama_stack_client_examples/{llama_stack_ollama => }/pull-models.sh (100%) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index d560733f4..e114cea40 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -104,6 +104,8 @@ docker-compose -f docker-compose.yaml up 1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` +2. Docker will likely need more system memory resources allocated to it + ## Useful ollama commands - `ollama list` diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker-compose.yaml index 4ba388ab4..76684f37b 100644 --- a/examples/llama_stack_client_examples/docker-compose.yaml +++ b/examples/llama_stack_client_examples/docker-compose.yaml @@ -5,32 +5,66 @@ services: container_name: ollama_server ports: - "11434:11434" # Map Ollama's port to host + environment: + OLLAMA_DEBUG: 1 volumes: - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models) + deploy: + resources: + limits: + memory: 16G # Set maximum memory + reservations: + memory: 12G # Set minimum memory reservation entrypoint: ["ollama", "serve"] # Start the Ollama server restart: always # Ensure Ollama server restarts on failure + healthcheck: + # ugh, no CURL in ollama image + test: ["CMD", "curl", "-f", "http://ollama:11434"] + interval: 10s + timeout: 5s + retries: 5 # Ephemeral service to trigger model download - model_downloader: - image: curlimages/curl:latest # Use a lightweight image with curl + # model_downloader: + # image: curlimages/curl:latest # Use a lightweight image with curl + # depends_on: + # - ollama # Ensure the Ollama server starts first + # entrypoint: > + # sh -c "sleep 5 && + # curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'" + # restart: "no" # Ensure this service doesn't restart + + ollama-init: + image: ollama/ollama:latest depends_on: - - ollama # Ensure the Ollama server starts first - entrypoint: > - sh -c "sleep 5 && - curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'" - restart: "no" # Ensure this service doesn't restart + ollama: + condition: service_started + network_mode: bridge + container_name: ollama-init + environment: + - OLLAMA_HOST=host.docker.internal + - INFERENCE_MODEL=llama3.2:3b-instruct-fp16 + volumes: + - ~/.ollama:/root/.ollama + - ./pull-models.sh:/root/pull-models.sh + entrypoint: ["/root/pull-models.sh"] - tester: - image: curlimages/curl:latest # Use a lightweight image with curl - depends_on: - - model_downloader # Ensure the Ollama server starts first - entrypoint: > - sh -c "sleep 5 && - curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" - restart: "no" # Ensure this service doesn't restart + # tester: + # image: curlimages/curl:latest # Use a lightweight image with curl + # depends_on: + # - model_downloader # Ensure the Ollama server starts first + # entrypoint: > + # sh -c "sleep 5 && + # curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" + # restart: "no" # Ensure this service doesn't restart llama-stack: + depends_on: + ollama: + condition: service_started + ollama-init: + condition: service_started image: llamastack/distribution-ollama container_name: llama_stack_server ports: @@ -45,10 +79,6 @@ services: --yaml-config /root/my-run.yaml --port 5001 platform: linux/amd64 - depends_on: - - ollama - - model_downloader - - tester networks: default: diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index 917a5a852..f790aac6b 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,14 +17,14 @@ }, { "cell_type": "code", - "execution_count": 59, + "execution_count": 64, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "41840.28s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + "44374.39s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" ] }, { @@ -70,7 +70,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "41847.06s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + "44382.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" ] }, { @@ -97,7 +97,7 @@ "name": "stderr", "output_type": "stream", "text": [ - "41853.46s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" + "44389.50s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" ] }, { @@ -127,7 +127,7 @@ }, { "cell_type": "code", - "execution_count": 60, + "execution_count": 65, "metadata": {}, "outputs": [], "source": [ @@ -156,7 +156,7 @@ }, { "cell_type": "code", - "execution_count": 61, + "execution_count": 66, "metadata": {}, "outputs": [], "source": [ @@ -166,7 +166,7 @@ }, { "cell_type": "code", - "execution_count": 62, + "execution_count": null, "metadata": {}, "outputs": [ { @@ -199,14 +199,14 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 71, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f3dde247-e5bd-4d25-ab2a-08612270cb08\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=165f74ba-2f5d-42d3-957f-b6f175dc2471\u001b[0m\u001b[0m\n" ] }, { @@ -216,8 +216,8 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[18], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:199\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 197\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 198\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 199\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", + "Cell \u001b[0;32mIn[71], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:201\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 201\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1248\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1249\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1256\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1258\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1259\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1260\u001b[0m )\n\u001b[0;32m-> 1261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", @@ -232,20 +232,20 @@ } ], "source": [ - "# agentops.start_session()\n", - "# response = client.inference.chat_completion(\n", - "# messages=[\n", - "# UserMessage(\n", - "# content=\"write me a 3 word poem about the moon\",\n", - "# role=\"user\",\n", - "# ),\n", - "# ],\n", - "# model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", - "# stream=False\n", - "# )\n", + "agentops.start_session()\n", + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " stream=False\n", + ")\n", "\n", - "# print(f\"> Response: {response}\")\n", - "# agentops.end_session(\"Success\")" + "print(f\"> Response: {response}\")\n", + "agentops.end_session(\"Success\")" ] }, { @@ -257,23 +257,21 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 68, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=16acb375-5b2d-4c7d-a086-276a333ffad4\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5e22565f-ce52-4eba-9de7-65898f52afc1\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", - "\n", - "\u001b[0m\u001b[33m\"M\u001b[0m\u001b[33moon\u001b[0m\u001b[33m's\u001b[0m\u001b[33m gentle\u001b[0m\u001b[33m silver\u001b[0m\u001b[33m glow\u001b[0m\u001b[33m\"\u001b[0m" + "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m beacon\u001b[0m\u001b[33m shines\u001b[0m\u001b[33m\"\u001b[0m" ] }, { diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh b/examples/llama_stack_client_examples/pull-models.sh similarity index 100% rename from examples/llama_stack_client_examples/llama_stack_ollama/pull-models.sh rename to examples/llama_stack_client_examples/pull-models.sh From 0114ede7dbda8da9bce6098214cf39c9f7d3e0ff Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 11:15:57 -0500 Subject: [PATCH 26/69] adding documentation for Llama Stack integration --- .gitignore | 4 +- README.md | 8 ++++ docs/mint.json | 1 + docs/v1/integrations/llama_stack.mdx | 66 ++++++++++++++++++++++++++++ 4 files changed, 78 insertions(+), 1 deletion(-) create mode 100644 docs/v1/integrations/llama_stack.mdx diff --git a/.gitignore b/.gitignore index 4db649aab..d6ab56734 100644 --- a/.gitignore +++ b/.gitignore @@ -164,4 +164,6 @@ cython_debug/ .DS_Store agentops_time_travel.json -.agentops_time_travel.yaml \ No newline at end of file +.agentops_time_travel.yaml + +node_modules \ No newline at end of file diff --git a/README.md b/README.md index 264c5bc4a..e87981dfa 100644 --- a/README.md +++ b/README.md @@ -574,6 +574,14 @@ Check out the [LlamaIndex docs](https://docs.llamaindex.ai/en/stable/module_guid +### Llama Stack 🦙🥞 + +AgentOps provides support for Llama Stack Python Client(>=0.0.53), allowing you to monitor your Agentic applications. + +- [AgentOps integration example 1](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-fdddf65549f3714f8f007ce7dfd1cde720329fe54155d54389dd50fbd81813cb) +- [AgentOps integration example 2](https://github.com/AgentOps-AI/agentops/pull/530/files/65a5ab4fdcf310326f191d4b870d4f553591e3ea#diff-6688ff4fb7ab1ce7b1cc9b8362ca27264a3060c16737fb1d850305787a6e3699) +- [Official Llama Stack Python Client](https://github.com/meta-llama/llama-stack-client-python) + ## Time travel debugging 🔮
diff --git a/docs/mint.json b/docs/mint.json index 45e61b450..ddde98a84 100644 --- a/docs/mint.json +++ b/docs/mint.json @@ -93,6 +93,7 @@ "v1/integrations/cohere", "v1/integrations/anthropic", "v1/integrations/ollama", + "v1/integrations/llama_stack", "v1/integrations/litellm", "v1/integrations/multion", "v1/integrations/rest" diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx new file mode 100644 index 000000000..163a4ca8e --- /dev/null +++ b/docs/v1/integrations/llama_stack.mdx @@ -0,0 +1,66 @@ +--- +title: 'Llama Stack' +description: '[Llama Stack](https://llama-stack.readthedocs.io/) is a framework for building Agentic applications.' +--- + +import CodeTooltip from '/snippets/add-code-tooltip.mdx' +import EnvTooltip from '/snippets/add-env-tooltip.mdx' + +AgentOps has built an integration with Llama Stack to make monitoring applications that leverage [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) simple. + +Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. + +## Adding AgentOps to Llama Stack applications + + + + + ```bash pip + pip install agentops + ``` + ```bash poetry + poetry add agentops + ``` + + + + + + + ```python python + import agentops + agentops.init() + ``` + + + + + + ```python .env + AGENTOPS_API_KEY= + ``` + + Read more about environment variables in [Advanced Configuration](/v1/usage/advanced-configuration) + + + + Execute your program and visit [app.agentops.ai/drilldown](https://app.agentops.ai/drilldown) to observe your waterfall! 🕵️ + + After your run, AgentOps prints a clickable url to console linking directly to your session in the Dashboard + + + + +## Llama Stack + AgentOps Examples + + + + + + + + + + + + From fa800999d56485e47a23684c8666caf3725cbd8a Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 11:41:39 -0500 Subject: [PATCH 27/69] rename compose.yaml files to follow the standard docker compose format --- .../README.fireworks.md | 9 --------- examples/llama_stack_client_examples/README.md | 4 ++-- ...{docker-compose.yaml => docker.compose.yaml} | 0 .../llama_stack_fireworks/README.fireworks.md | 17 +++++++++++++++++ .../fireworks-server-config.yaml | 0 .../fireworks.compose.yaml} | 0 6 files changed, 19 insertions(+), 11 deletions(-) delete mode 100644 examples/llama_stack_client_examples/README.fireworks.md rename examples/llama_stack_client_examples/{docker-compose.yaml => docker.compose.yaml} (100%) create mode 100644 examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md rename examples/llama_stack_client_examples/{ => llama_stack_fireworks}/fireworks-server-config.yaml (100%) rename examples/llama_stack_client_examples/{fireworks-compose.yaml => llama_stack_fireworks/fireworks.compose.yaml} (100%) diff --git a/examples/llama_stack_client_examples/README.fireworks.md b/examples/llama_stack_client_examples/README.fireworks.md deleted file mode 100644 index 412821953..000000000 --- a/examples/llama_stack_client_examples/README.fireworks.md +++ /dev/null @@ -1,9 +0,0 @@ -## - -https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml - -## - -```sh -docker-compose -f fireworks-server-config.yaml up -``` diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index e114cea40..3e88eea99 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -69,7 +69,7 @@ ie: Check out the examples in the `llama_stack_client_examples.ipynb` file ## Running the Ollama Server in a Docker Container ```sh - set up the ollama server -docker-compose -f docker-compose.yaml up +docker-compose -f docker.compose.yaml up ``` ```sh - download a model @@ -97,7 +97,7 @@ curl http://localhost:11434/api/chat -d '{ ## 2 - Running the Ollama Server in a Docker Container ```sh -docker-compose -f docker-compose.yaml up +docker-compose -f docker.compose.yaml up ``` ## Common Gotchas diff --git a/examples/llama_stack_client_examples/docker-compose.yaml b/examples/llama_stack_client_examples/docker.compose.yaml similarity index 100% rename from examples/llama_stack_client_examples/docker-compose.yaml rename to examples/llama_stack_client_examples/docker.compose.yaml diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md new file mode 100644 index 000000000..899bfbbeb --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md @@ -0,0 +1,17 @@ +# TLDR + +Here are the links of where to find Fireworks integration examples in the Llama Stack repository + +*Disclaimer: This has been tested but not shown to ever work end-2-end* + +## + +https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml +https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks + + +## + +```sh +docker-compose -f fireworks.compose.yaml up +``` diff --git a/examples/llama_stack_client_examples/fireworks-server-config.yaml b/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml similarity index 100% rename from examples/llama_stack_client_examples/fireworks-server-config.yaml rename to examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml diff --git a/examples/llama_stack_client_examples/fireworks-compose.yaml b/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml similarity index 100% rename from examples/llama_stack_client_examples/fireworks-compose.yaml rename to examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml From b1e433581f0643aaacf77868225b84bb4ba27b20 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 11:52:49 -0500 Subject: [PATCH 28/69] minor tweaks --- .../llama_stack_fireworks/README.fireworks.md | 1 - 1 file changed, 1 deletion(-) diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md index 899bfbbeb..e8432b453 100644 --- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md +++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md @@ -9,7 +9,6 @@ Here are the links of where to find Fireworks integration examples in the Llama https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks - ## ```sh From dd27a377e26f46f342d145597d53edd387214178 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 12:07:21 -0500 Subject: [PATCH 29/69] add disclaimer in the Fireworks docker compose file --- .../llama_stack_fireworks/README.fireworks.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md index e8432b453..75c10f74b 100644 --- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md +++ b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md @@ -2,7 +2,7 @@ Here are the links of where to find Fireworks integration examples in the Llama Stack repository -*Disclaimer: This has been tested but not shown to ever work end-2-end* +*Disclaimer: This Llama Stack Server + Fireworks setup has been tested but NOT shown to ever work end-2-end* ## From 9c4ab6e28ad076d4d585724765d15b19b898ec6c Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 16:16:29 -0500 Subject: [PATCH 30/69] pushing for Alex --- agentops/llms/llama_stack_client.py | 49 +++- .../llama_stack_client_examples/README.md | 1 + .../llama_stack_client_examples.ipynb | 271 +++++++++--------- .../llama_stack_client_canary/agent_canary.py | 8 +- 4 files changed, 177 insertions(+), 152 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 25bb65ff9..db19a2ebc 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -1,7 +1,7 @@ import inspect import pprint import sys -from typing import Dict, Optional +from typing import Any, AsyncGenerator, Dict, Optional from agentops.event import LLMEvent, ErrorEvent, ToolEvent from agentops.session import Session @@ -15,6 +15,7 @@ class LlamaStackClientProvider(InstrumentedProvider): def __init__(self, client): + print("_!_!_ LlamaStackClientProvider _!_!_") super().__init__(client) self._provider_name = "LlamaStack" @@ -23,9 +24,13 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se try: accum_delta = None accum_tool_delta = None + tool_event = None + llm_event = None def handle_stream_chunk(chunk: dict): - llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + if session is not None: llm_event.session_id = session.session_id @@ -72,19 +77,21 @@ def handle_stream_chunk(chunk: dict): def handle_stream_agent(chunk: dict): # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - tool_event = ToolEvent(init_timestamp=init_timestamp, params=kwargs) + # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) + + nonlocal llm_event if session is not None: llm_event.session_id = session.session_id - if llm_event.returns is None: + if getattr(llm_event, 'returns', None): llm_event.returns = chunk.event - try: if chunk.event.payload.event_type == "turn_start": pass elif chunk.event.payload.event_type == "step_start": + print("step_start") + llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) pass elif chunk.event.payload.event_type == "step_progress": @@ -101,9 +108,15 @@ def handle_stream_agent(chunk: dict): elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): if (chunk.event.payload.tool_call_delta.parse_status == "started"): + print('ToolExecution - started') + nonlocal tool_event + tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs) + tool_event.name = "ToolExecution - started" - self._safe_record(session, tool_event) + tool_event.init_timestamp = get_ISO_time() + # self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"): + print('ToolExecution - progress') nonlocal accum_tool_delta delta = chunk.event.payload.tool_call_delta.content if accum_tool_delta: @@ -111,14 +124,22 @@ def handle_stream_agent(chunk: dict): else: accum_tool_delta = delta elif (chunk.event.payload.tool_call_delta.parse_status == "success"): + print('ToolExecution - success') tool_event.name = "ToolExecution - success" tool_event.params["completion"] = accum_tool_delta - self._safe_record(session, tool_event) + tool_event.end_timestamp = get_ISO_time() + # self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): - self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) + tool_event.name = "ToolExecution - failure" + tool_event.end_timestamp = get_ISO_time() + print('ToolExecution - failure') + pass + # self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) elif chunk.event.payload.event_type == "step_complete": + print("step_complete") if (chunk.event.payload.step_type == "inference"): + print("step_complete inference") llm_event.prompt = [ {"content": message['content'], "role": message['role']} for message in kwargs["messages"] ] @@ -130,6 +151,7 @@ def handle_stream_agent(chunk: dict): llm_event.end_timestamp = get_ISO_time() self._safe_record(session, llm_event) elif (chunk.event.payload.step_type == "tool_execution"): + print('ToolExecution - complete') tool_event.name = "ToolExecution - complete" tool_event.params["completion"] = accum_tool_delta self._safe_record(session, tool_event) @@ -146,7 +168,6 @@ def handle_stream_agent(chunk: dict): f"chunk:\n {chunk}\n" f"kwargs:\n {kwargs_str}\n" ) - if kwargs.get("stream", False): def generator(): for chunk in response: @@ -159,6 +180,12 @@ async def async_generator(): handle_stream_agent(chunk) yield chunk return async_generator() + elif inspect.isgenerator(response): + async def async_generator(): + async for chunk in response: + handle_stream_agent(chunk) + yield chunk + return async_generator() else: llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) if session is not None: @@ -215,6 +242,7 @@ def patched_function(*args, **kwargs): session = kwargs.get("session", None) if "session" in kwargs.keys(): del kwargs["session"] + result = self.original_create_turn(*args, **kwargs) return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")}) @@ -223,6 +251,7 @@ def patched_function(*args, **kwargs): def override(self): + print("_!_!_ override _!_!_") self._override_complete() self._override_create_turn() diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 3e88eea99..784f853ee 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -121,3 +121,4 @@ docker-compose -f docker.compose.yaml up - download https://ollama.com/ - https://www.llama.com/docs/getting_the_models/meta/ - https://llama-stack.readthedocs.io/en/latest/getting_started/index.html +- https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb index f790aac6b..384290cc8 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb @@ -17,21 +17,14 @@ }, { "cell_type": "code", - "execution_count": 64, + "execution_count": 1, "metadata": {}, "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "44374.39s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" - ] - }, { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.55)\n", + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n", "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", @@ -63,20 +56,61 @@ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "44382.44s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n", + "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n", + "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n", + "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n", + "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n", + "Requirement already satisfied: llama-models>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n", + "Requirement already satisfied: llama-stack-client>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n", + "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n", + "Requirement already satisfied: requests in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.32.3)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n", + "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n", + "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (6.0.2)\n", + "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (3.1.4)\n", + "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (0.8.0)\n", + "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (11.0.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.6.2.post1)\n", + "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.9.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (2.2.3)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (24.9.0)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.67.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.12.2)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n", + "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", + "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.21.0)\n", + "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (2.2.3)\n", + "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (5.3.0)\n", + "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (23.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests->llama-stack) (3.4.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.56->llama-stack) (3.0.2)\n", + "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.56->llama-stack) (2024.11.6)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.56->llama-stack) (1.16.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", @@ -90,32 +124,34 @@ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "44389.50s - pydevd: Sending message related to process being replaced timed-out after 5 seconds\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ + "Note: you may need to restart the kernel to use updated packages.\n", "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: fastapi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.115.5)\n", + "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (0.41.3)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (2.10.1)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.6.2.post1)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } ], "source": [ "%pip install -U llama-stack-client\n", + "%pip install -U llama-stack\n", "%pip install -U agentops\n", - "%pip install -U python-dotenv" + "%pip install -U python-dotenv\n", + "%pip install -U fastapi\n" ] }, { @@ -127,10 +163,11 @@ }, { "cell_type": "code", - "execution_count": 65, + "execution_count": 1, "metadata": {}, "outputs": [], "source": [ + "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client.lib.inference.event_logger import EventLogger\n", "from llama_stack_client.types import UserMessage\n", @@ -139,46 +176,11 @@ "from dotenv import load_dotenv\n", "import os\n", "import agentops\n", - "import asyncio" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables" - ] - }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [] - }, - { - "cell_type": "code", - "execution_count": 66, - "metadata": {}, - "outputs": [], - "source": [ + "\n", "load_dotenv()\n", - "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"" - ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n" - ] - } - ], - "source": [ - "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"\n", + "\n", + "# agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", "port = 5001 # LLAMA_STACK_PORT\n", @@ -199,14 +201,14 @@ }, { "cell_type": "code", - "execution_count": 71, + "execution_count": 5, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=165f74ba-2f5d-42d3-957f-b6f175dc2471\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5f22f2fd-2561-4b8d-8d8c-1ae875d8075c\u001b[0m\u001b[0m\n" ] }, { @@ -216,17 +218,17 @@ "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[71], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-3B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:201\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 199\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 200\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 201\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 202\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", + "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:207\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 207\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1261\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1247\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1248\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1249\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1256\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1257\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1258\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1259\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1260\u001b[0m )\n\u001b[0;32m-> 1261\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:953\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 950\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 951\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 953\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 954\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 955\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1040\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1042\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1043\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1041\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1039\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1040\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1041\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1042\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1043\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1051\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1090\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1086\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1087\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1088\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1090\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1091\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1092\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1056\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1053\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1055\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1056\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1058\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1059\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1060\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1064\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1065\u001b[0m )\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" ] } @@ -240,7 +242,7 @@ " role=\"user\",\n", " ),\n", " ],\n", - " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", " stream=False\n", ")\n", "\n", @@ -257,43 +259,31 @@ }, { "cell_type": "code", - "execution_count": 68, + "execution_count": 6, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5e22565f-ce52-4eba-9de7-65898f52afc1\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33m\"C\u001b[0m\u001b[33mele\u001b[0m\u001b[33mstial\u001b[0m\u001b[33m beacon\u001b[0m\u001b[33m shines\u001b[0m\u001b[33m\"\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ - "\u001b[97m\u001b[0m\n" + "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", + "\n", + "\u001b[0m\u001b[33m\"L\u001b[0m\u001b[33munar\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.1s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n" ] } ], @@ -306,7 +296,7 @@ " role=\"user\",\n", " ),\n", " ],\n", - " model_id=\"meta-llama/Llama-3.2-3B-Instruct\",\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", " stream=True\n", ")\n", "\n", @@ -318,34 +308,22 @@ }, { "cell_type": "code", - "execution_count": 54, + "execution_count": null, "metadata": {}, "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "_!_!_ LlamaStackClientProvider _!_!_\n", + "_!_!_ override _!_!_\n" + ] + }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", - "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n", - "response:\n", - " \n", - "kwargs:\n", - " {'messages': [{'content': 'Hello', 'role': 'user'}],\n", - " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n", - "\n", - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", - "🖇 AgentOps: Unable to parse response for LLM call. Skipping upload to AgentOps\n", - "response:\n", - " \n", - "kwargs:\n", - " {'messages': [{'content': 'Which players played in the winning team of the NBA '\n", - " 'western conference semifinals of 2024, please use '\n", - " 'tools',\n", - " 'role': 'user'}],\n", - " 'session_id': '37065665-9eba-49f1-bc6f-d616ff320e8d'}\n", - "\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n" ] }, { @@ -354,8 +332,16 @@ "text": [ "No available shields. Disable safety.\n", "Using model: meta-llama/Llama-3.2-1B-Instruct\n", - "Response: \n", - "Response: \n" + "response=.async_generator at 0x10ee067a0>\n", + "response=.async_generator at 0x10ee70900>\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n" ] } ], @@ -366,6 +352,8 @@ "from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", "\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "\n", "# Apply nest_asyncio to handle nested event loops\n", "# nest_asyncio.apply()\n", "\n", @@ -376,7 +364,7 @@ "\n", "async def agent_test():\n", " client = LlamaStackClient(\n", - " base_url=f\"http://localhost:{LLAMA_STACK_PORT}\",\n", + " base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n", " )\n", "\n", " available_shields = [shield.identifier for shield in client.shields.list()]\n", @@ -415,7 +403,7 @@ " agent = Agent(client, agent_config)\n", " user_prompts = [\n", " \"Hello\",\n", - " \"Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools\",\n", + " \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n", " ]\n", "\n", " session_id = agent.create_session(\"test-session\")\n", @@ -431,9 +419,16 @@ " session_id=session_id,\n", " )\n", "\n", - " print(\"Response: \", response)\n", + " print(f\"{response=}\")\n", + "\n", + " # async for log in EventLogger().log(response):\n", + " # log.print()\n", + "\n", + "agentops.start_session()\n", + "\n", + "await agent_test()\n", "\n", - "await agent_test()" + "agentops.end_session(\"Success\")" ] }, { diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index b466f45d0..ea0042d36 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -12,9 +12,9 @@ LLAMA_STACK_PORT = 5001 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" -# import debugpy -# debugpy.listen(5678) -# debugpy.wait_for_client() +import debugpy +debugpy.listen(5678) +debugpy.wait_for_client() agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False) @@ -60,7 +60,7 @@ async def agent_test(): agent = Agent(client, agent_config) user_prompts = [ "Hello", - "Which players played in the winning team of the NBA western conference semifinals of 2024, please use tools", + "Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools", ] session_id = agent.create_session("test-session") From 978d4f055498cd9f82cbf7db21dc88f22425c821 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 19:53:32 -0500 Subject: [PATCH 31/69] saving changes to track Llama Stack Agent events with a stack data structure --- agentops/llms/llama_stack_client.py | 123 +++++++++++++++++----------- 1 file changed, 74 insertions(+), 49 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index db19a2ebc..3e94401fa 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -15,35 +15,35 @@ class LlamaStackClientProvider(InstrumentedProvider): def __init__(self, client): - print("_!_!_ LlamaStackClientProvider _!_!_") super().__init__(client) self._provider_name = "LlamaStack" def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict: """Handle responses for LlamaStack""" try: + stack = [] accum_delta = None accum_tool_delta = None - tool_event = None - llm_event = None + # tool_event = None + # llm_event = None def handle_stream_chunk(chunk: dict): # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - if session is not None: - llm_event.session_id = session.session_id + # if session is not None: + # llm_event.session_id = session.session_id # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - if llm_event.returns is None: - llm_event.returns = chunk.event + # if llm_event.returns is None: + # llm_event.returns = chunk.event try: nonlocal accum_delta - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = kwargs["model_id"] - llm_event.prompt = kwargs["messages"] + # llm_event.agent_id = check_call_stack_for_agent_id() + # llm_event.model = kwargs["model_id"] + # llm_event.prompt = kwargs["messages"] # NOTE: We assume for completion only choices[0] is relevant # chunk.event @@ -79,44 +79,53 @@ def handle_stream_agent(chunk: dict): # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - nonlocal llm_event + # nonlocal llm_event + nonlocal stack if session is not None: llm_event.session_id = session.session_id - if getattr(llm_event, 'returns', None): - llm_event.returns = chunk.event + # if getattr(llm_event, 'returns', None): + # llm_event.returns = chunk.event try: if chunk.event.payload.event_type == "turn_start": - pass + print("turn_start") + stack.append({ + 'event_type': chunk.event.payload.event_type, + 'event': None + }) elif chunk.event.payload.event_type == "step_start": print("step_start") - llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - pass + llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) + stack.append({ + 'event_type': chunk.event.payload.event_type, + 'event': llm_event + }) elif chunk.event.payload.event_type == "step_progress": - if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response): nonlocal accum_delta delta = chunk.event.payload.text_delta_model_response - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.prompt = kwargs["messages"] + # llm_event.agent_id = check_call_stack_for_agent_id() + # llm_event.prompt = kwargs["messages"] if accum_delta: accum_delta += delta else: accum_delta = delta - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): - + elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): if (chunk.event.payload.tool_call_delta.parse_status == "started"): - print('ToolExecution - started') - nonlocal tool_event + print('tool_started') tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs) + tool_event.name = "tool_started" + + stack.append({ + "event_type": "tool_started", + "event": tool_event + }) - tool_event.name = "ToolExecution - started" - tool_event.init_timestamp = get_ISO_time() # self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"): - print('ToolExecution - progress') + # print('ToolExecution - in_progress') nonlocal accum_tool_delta delta = chunk.event.payload.tool_call_delta.content if accum_tool_delta: @@ -125,40 +134,57 @@ def handle_stream_agent(chunk: dict): accum_tool_delta = delta elif (chunk.event.payload.tool_call_delta.parse_status == "success"): print('ToolExecution - success') - tool_event.name = "ToolExecution - success" - tool_event.params["completion"] = accum_tool_delta - tool_event.end_timestamp = get_ISO_time() - # self._safe_record(session, tool_event) + if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event + + tool_event = stack.pop().get("event") + tool_event.end_timestamp = get_ISO_time() + # tool_event.name = "ToolExecution - success" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): - tool_event.name = "ToolExecution - failure" - tool_event.end_timestamp = get_ISO_time() print('ToolExecution - failure') - pass - # self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) + if stack[-1]['event_type'] == "ToolExecution - started": + tool_event = stack.pop().get("event") + tool_event.end_timestamp = get_ISO_time() + # tool_event.name = "ToolExecution - failure" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) elif chunk.event.payload.event_type == "step_complete": print("step_complete") if (chunk.event.payload.step_type == "inference"): + print("step_complete inference") - llm_event.prompt = [ - {"content": message['content'], "role": message['role']} for message in kwargs["messages"] - ] - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = metadata.get("model_id", "Unable to identify model") - llm_event.prompt_tokens = None - llm_event.completion = accum_delta or kwargs["completion"] - llm_event.completion_tokens = None - llm_event.end_timestamp = get_ISO_time() - self._safe_record(session, llm_event) + if stack[-1]['event_type'] == "step_start": # check if the last event in the stack is a step start event + llm_event = stack.pop().get("event") + llm_event.prompt = [ + {"content": message['content'], "role": message['role']} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.prompt_tokens = None + llm_event.completion = accum_delta or kwargs["completion"] + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) elif (chunk.event.payload.step_type == "tool_execution"): - print('ToolExecution - complete') - tool_event.name = "ToolExecution - complete" - tool_event.params["completion"] = accum_tool_delta - self._safe_record(session, tool_event) + if stack[-1]['event_type'] == "tool_started": + print('tool_complete') + tool_event = stack.pop().get("event") + tool_event.name = "tool_complete" + tool_event.params["completion"] = accum_tool_delta + self._safe_record(session, tool_event) elif chunk.event.payload.event_type == "turn_complete": + if stack[-1]['event_type'] == "turn_start": + print('turn_start') + # llm_event = stack.pop() + # llm_event.end_timestamp = get_ISO_time() + # self._safe_record(session, llm_event) pass except Exception as e: + llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs) + self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) kwargs_str = pprint.pformat(kwargs) @@ -251,7 +277,6 @@ def patched_function(*args, **kwargs): def override(self): - print("_!_!_ override _!_!_") self._override_complete() self._override_create_turn() From d23564341b4626d355531b4982171965d120ddcb Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 19:59:34 -0500 Subject: [PATCH 32/69] removing commented code --- agentops/llms/llama_stack_client.py | 2 -- 1 file changed, 2 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 3e94401fa..688026e64 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -105,8 +105,6 @@ def handle_stream_agent(chunk: dict): if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response): nonlocal accum_delta delta = chunk.event.payload.text_delta_model_response - # llm_event.agent_id = check_call_stack_for_agent_id() - # llm_event.prompt = kwargs["messages"] if accum_delta: accum_delta += delta From 3ee63ccc7b06c48ed715e63cdf49c4df5827e55a Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 20:31:03 -0500 Subject: [PATCH 33/69] tweak handle_stream_chunk in handle_response function of Llama Stack LLM provider to use a stack data structure --- agentops/llms/llama_stack_client.py | 45 ++++++++++++------- .../inference_canary.py | 6 +-- 2 files changed, 32 insertions(+), 19 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 688026e64..9dfb604dd 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -28,11 +28,8 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se # llm_event = None def handle_stream_chunk(chunk: dict): - # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) - # if session is not None: - # llm_event.session_id = session.session_id + nonlocal stack # NOTE: prompt/completion usage not returned in response when streaming # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion @@ -45,25 +42,41 @@ def handle_stream_chunk(chunk: dict): # llm_event.model = kwargs["model_id"] # llm_event.prompt = kwargs["messages"] - # NOTE: We assume for completion only choices[0] is relevant - # chunk.event - if chunk.event.event_type == "start": + llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) + stack.append({ + 'event_type': "start", + 'event': llm_event + }) accum_delta = chunk.event.delta elif chunk.event.event_type == "progress": accum_delta += chunk.event.delta elif chunk.event.event_type == "complete": - llm_event.prompt = [ - {"content": message.content, "role": message.role} for message in kwargs["messages"] - ] - llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.prompt_tokens = None - llm_event.completion = accum_delta - llm_event.completion_tokens = None - llm_event.end_timestamp = get_ISO_time() - self._safe_record(session, llm_event) + if stack[-1]['event_type'] == "start": # check if the last event in the stack is a step start event + llm_event = stack.pop().get("event") + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] + llm_event.agent_id = check_call_stack_for_agent_id() + llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.prompt_tokens = None + llm_event.completion = accum_delta or kwargs["completion"] + llm_event.completion_tokens = None + llm_event.end_timestamp = get_ISO_time() + self._safe_record(session, llm_event) + + # llm_event.prompt = [ + # {"content": message.content, "role": message.role} for message in kwargs["messages"] + # ] + # llm_event.agent_id = check_call_stack_for_agent_id() + # llm_event.prompt_tokens = None + # llm_event.completion = accum_delta + # llm_event.completion_tokens = None + # llm_event.end_timestamp = get_ISO_time() + # self._safe_record(session, llm_event) except Exception as e: + llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs) self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) kwargs_str = pprint.pformat(kwargs) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py index 38dec66cc..02a86e914 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py @@ -11,9 +11,9 @@ agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) -# import debugpy -# debugpy.listen(5678) -# debugpy.wait_for_client() +import debugpy +debugpy.listen(5678) +debugpy.wait_for_client() host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT From 44494e13fb824843b00c72511cf78495cfc2ebfe Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 20:37:41 -0500 Subject: [PATCH 34/69] removing comments --- agentops/llms/llama_stack_client.py | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 9dfb604dd..226fc9506 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -65,16 +65,6 @@ def handle_stream_chunk(chunk: dict): llm_event.end_timestamp = get_ISO_time() self._safe_record(session, llm_event) - # llm_event.prompt = [ - # {"content": message.content, "role": message.role} for message in kwargs["messages"] - # ] - # llm_event.agent_id = check_call_stack_for_agent_id() - # llm_event.prompt_tokens = None - # llm_event.completion = accum_delta - # llm_event.completion_tokens = None - # llm_event.end_timestamp = get_ISO_time() - # self._safe_record(session, llm_event) - except Exception as e: llm_event = LLMEvent(init_timestamp=init_timestamp, end_timestamp=get_ISO_time(), params=kwargs) self._safe_record(session, ErrorEvent(trigger_event=llm_event, exception=e)) From bcf22a8b8853cd49764db8105fb8206277899c63 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 21:10:33 -0500 Subject: [PATCH 35/69] inference_canary 1 and 2 now for clarity --- .../llama_stack_client_examples/README.md | 1 + ...erence_canary.py => inference_canary_1.py} | 13 ----- .../inference_canary_2.py | 57 +++++++++++++++++++ 3 files changed, 58 insertions(+), 13 deletions(-) rename tests/core_manual_tests/providers/llama_stack_client_canary/{inference_canary.py => inference_canary_1.py} (78%) create mode 100644 tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 784f853ee..9aeb16426 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -111,6 +111,7 @@ docker-compose -f docker.compose.yaml up - `ollama list` - `ollama help` - `ollama ps` +- `tail -f ~/.ollama/logs/server.log` ## Reference links used during development diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py similarity index 78% rename from tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py rename to tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py index 02a86e914..3a1e95a20 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py @@ -1,5 +1,4 @@ import asyncio - import agentops import os from dotenv import load_dotenv @@ -42,18 +41,6 @@ async def stream_test(): def main(): agentops.start_session() - - # client.inference.chat_completion( - # messages=[ - # UserMessage( - # content="hello world, write me a 3 word poem about the moon", - # role="user", - # ), - # ], - # model_id="meta-llama/Llama-3.2-1B-Instruct", - # stream=False, - # ) - asyncio.run(stream_test()) agentops.end_session(end_state="Success") diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py new file mode 100644 index 000000000..ee1bcd9e7 --- /dev/null +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -0,0 +1,57 @@ +import agentops +import os +from dotenv import load_dotenv +from llama_stack_client import LlamaStackClient +from llama_stack_client.types import UserMessage +from llama_stack_client.lib.inference.event_logger import EventLogger + +load_dotenv() + +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) + +import debugpy +debugpy.listen(5678) +debugpy.wait_for_client() + +host = "0.0.0.0" # LLAMA_STACK_HOST +port = 5001 # LLAMA_STACK_PORT + +full_host = f"http://{host}:{port}" + +client = LlamaStackClient( + base_url=f"{full_host}", +) + +async def stream_test(): + response = client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=True, + ) + + async for log in EventLogger().log(response): + log.print() + + +def main(): + agentops.start_session() + + client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=False, + ) + + agentops.end_session(end_state="Success") + +main() From a4fec782f56b547778b247ac04249ca1fd7da906 Mon Sep 17 00:00:00 2001 From: tad dy Date: Mon, 2 Dec 2024 21:11:32 -0500 Subject: [PATCH 36/69] organizing canaries --- .../llama_stack_client_canary/inference_canary_1.py | 6 +++--- .../llama_stack_client_canary/inference_canary_2.py | 6 +++--- 2 files changed, 6 insertions(+), 6 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py index 3a1e95a20..66717f898 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py @@ -10,9 +10,9 @@ agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) -import debugpy -debugpy.listen(5678) -debugpy.wait_for_client() +# import debugpy +# debugpy.listen(5678) +# debugpy.wait_for_client() host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py index ee1bcd9e7..f18db96bf 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -9,9 +9,9 @@ agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) -import debugpy -debugpy.listen(5678) -debugpy.wait_for_client() +# import debugpy +# debugpy.listen(5678) +# debugpy.wait_for_client() host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT From 7319616be71e13fff64bd514afdcc659deb0f9ff Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 05:09:18 -0600 Subject: [PATCH 37/69] not a big deal --- agentops/llms/llama_stack_client.py | 29 ++-- .../llama_stack_client_examples/README.md | 153 ++++++------------ ...ks.compose.yaml => compose.fireworks.yaml} | 4 +- .../{llama_stack_ollama => }/compose.yaml | 45 ++++-- .../docker.compose.yaml | 85 ---------- ...rver-config.yaml => fireworks.config.yaml} | 0 .../llama-stack-server-config.yaml | 54 ------- .../llama_stack_fireworks/README.fireworks.md | 16 -- .../llama_stack_ollama/README.md | 4 - ...k_client_examples.ipynb => notebook.ipynb} | 10 ++ ...ith-safety.yaml => run-safety-shield.yaml} | 0 .../{llama_stack_ollama => }/run.yaml | 3 +- 12 files changed, 105 insertions(+), 298 deletions(-) rename examples/llama_stack_client_examples/{llama_stack_fireworks/fireworks.compose.yaml => compose.fireworks.yaml} (75%) rename examples/llama_stack_client_examples/{llama_stack_ollama => }/compose.yaml (56%) delete mode 100644 examples/llama_stack_client_examples/docker.compose.yaml rename examples/llama_stack_client_examples/{llama_stack_fireworks/fireworks-server-config.yaml => fireworks.config.yaml} (100%) delete mode 100644 examples/llama_stack_client_examples/llama-stack-server-config.yaml delete mode 100644 examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md delete mode 100644 examples/llama_stack_client_examples/llama_stack_ollama/README.md rename examples/llama_stack_client_examples/{llama_stack_client_examples.ipynb => notebook.ipynb} (99%) rename examples/llama_stack_client_examples/{llama_stack_ollama/run-with-safety.yaml => run-safety-shield.yaml} (100%) rename examples/llama_stack_client_examples/{llama_stack_ollama => }/run.yaml (96%) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 226fc9506..e5c2c1559 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -1,7 +1,9 @@ import inspect import pprint import sys -from typing import Any, AsyncGenerator, Dict, Optional +from typing import Any, AsyncGenerator, Dict, Optional, List +import logging +from typing import Union from agentops.event import LLMEvent, ErrorEvent, ToolEvent from agentops.session import Session @@ -92,13 +94,13 @@ def handle_stream_agent(chunk: dict): # llm_event.returns = chunk.event try: if chunk.event.payload.event_type == "turn_start": - print("turn_start") + logger.debug("turn_start") stack.append({ 'event_type': chunk.event.payload.event_type, 'event': None }) elif chunk.event.payload.event_type == "step_start": - print("step_start") + logger.debug("step_start") llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) stack.append({ 'event_type': chunk.event.payload.event_type, @@ -115,7 +117,7 @@ def handle_stream_agent(chunk: dict): accum_delta = delta elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): if (chunk.event.payload.tool_call_delta.parse_status == "started"): - print('tool_started') + logger.debug('tool_started') tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs) tool_event.name = "tool_started" @@ -134,7 +136,7 @@ def handle_stream_agent(chunk: dict): else: accum_tool_delta = delta elif (chunk.event.payload.tool_call_delta.parse_status == "success"): - print('ToolExecution - success') + logger.debug('ToolExecution - success') if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event tool_event = stack.pop().get("event") @@ -143,7 +145,7 @@ def handle_stream_agent(chunk: dict): tool_event.params["completion"] = accum_tool_delta self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): - print('ToolExecution - failure') + logger.warning('ToolExecution - failure') if stack[-1]['event_type'] == "ToolExecution - started": tool_event = stack.pop().get("event") tool_event.end_timestamp = get_ISO_time() @@ -152,11 +154,12 @@ def handle_stream_agent(chunk: dict): self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) elif chunk.event.payload.event_type == "step_complete": - print("step_complete") + logger.debug("Step complete event received") + if (chunk.event.payload.step_type == "inference"): - - print("step_complete inference") - if stack[-1]['event_type'] == "step_start": # check if the last event in the stack is a step start event + logger.debug("Step complete inference") + + if stack[-1]['event_type'] == "step_start": llm_event = stack.pop().get("event") llm_event.prompt = [ {"content": message['content'], "role": message['role']} for message in kwargs["messages"] @@ -168,16 +171,18 @@ def handle_stream_agent(chunk: dict): llm_event.completion_tokens = None llm_event.end_timestamp = get_ISO_time() self._safe_record(session, llm_event) + else: + logger.warning("Unexpected event stack state for inference step complete") elif (chunk.event.payload.step_type == "tool_execution"): if stack[-1]['event_type'] == "tool_started": - print('tool_complete') + logger.debug('tool_complete') tool_event = stack.pop().get("event") tool_event.name = "tool_complete" tool_event.params["completion"] = accum_tool_delta self._safe_record(session, tool_event) elif chunk.event.payload.event_type == "turn_complete": if stack[-1]['event_type'] == "turn_start": - print('turn_start') + logger.debug('turn_start') # llm_event = stack.pop() # llm_event.end_timestamp = get_ISO_time() # self._safe_record(session, llm_event) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 9aeb16426..9c6a4ca40 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -1,125 +1,66 @@ -# TLDR +# Llama Stack Client Examples -How to set up a Llama Stack server for supporting the `llama_stack_client_example.ipynb` examples +Run Llama Stack with Ollama - either local or containerized. -## Disclaimer +## Quick Start -As of 11/2024, Llama Stack is new and is subject to breaking changes. -Here are Llama Stack's docs: https://llama-stack.readthedocs.io/en/latest/ +Just run: -## ToC - -1. Running the Ollama Server and Llama Stack Server on the Host - - a) Download, install, & start Ollama - - b) Start the Llama Stack Server - - c) Call the Llama Stack Server with a Llama Stack Client -2. Running the Ollama Server in a Docker Container - -## Running the Ollama Server and Llama Stack Server on the Host - -### 1a - Download, install, & start Ollama - -https://ollama.com/ - -Ollama has an easy-to-use installer available for macOS, Linux, and Windows. - -```sh -export OLLAMA_INFERENCE_MODEL="llama3.2:1b-instruct-fp16" -ollama run $OLLAMA_INFERENCE_MODEL --keepalive 60m -ollama run llama3.2:1b --keepalive 60m -``` - -### 1b - Start the Llama Stack server - -You need to configure the Llama Stack server with a yaml config ie: peep the `llama-stack-server-config.yaml` file. FYI, found this config here: `https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml` - -```sh -export LLAMA_STACK_PORT=5001 -export INFERENCE_MODEL="meta-llama/Llama-3.2-1B-Instruct" -docker run \ - -it \ - -p $LLAMA_STACK_PORT:$LLAMA_STACK_PORT \ - -v ~/.llama:/root/.llama \ - -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \ - llamastack/distribution-ollama \ - --yaml-config /root/my-run.yaml \ - --port $LLAMA_STACK_PORT \ - --env INFERENCE_MODEL=$INFERENCE_MODEL \ - --env OLLAMA_URL=http://host.docker.internal:11434 +```bash +./start-stack.sh ``` -```sh -docker run \ - -it \ - -p 5001:5001 \ - -v ~/.llama:/root/.llama \ - -v ./examples/llama_stack_client_examples/llama-stack-server-config.yaml:/root/my-run.yaml \ - llamastack/distribution-ollama \ - --yaml-config /root/my-run.yaml \ - --port 5001 \ - --env INFERENCE_MODEL=meta-llama/Llama-3.2-1B \ - --env OLLAMA_URL=http://host.docker.internal:11434 -``` +The script will: +1. Check if Ollama is already running locally +2. Check if Llama Stack server is already running +3. Guide you through what needs to be started +Example outputs: -### 1c - Call the Llama Stack Server with a Llama Stack Client +```bash +# Scenario 1: Ollama running locally +✓ Ollama server is running locally +✗ No Llama Stack server detected +Start Llama Stack server? [Y/n] -ie: Check out the examples in the `llama_stack_client_examples.ipynb` file - -## Running the Ollama Server in a Docker Container - -```sh - set up the ollama server -docker-compose -f docker.compose.yaml up +# Scenario 2: Nothing running +✗ No local Ollama server detected +✗ No Llama Stack server detected +No Ollama server detected. Start both Ollama and Llama Stack? [Y/n] ``` -```sh - download a model -curl -X POST http://localhost:11434/api/pull -d '{"model": "llama3.2:1b"}' -``` +## Environment Variables -```sh - test the model -curl http://localhost:11434/api/generate -d '{ - "model": "llama3.2:1b", - "prompt": "Why is the sky blue?" -}' - -curl http://localhost:11434/api/chat -d '{ - "model": "llama3.2:1b", - "messages": [ - { - "role": "user", - "content": "why is the sky blue?" - } - ], - "stream": false -}' -``` +| Variable | Description | Default | +|----------|-------------|---------| +| `LLAMA_STACK_PORT` | Server port | 5001 | +| `INFERENCE_MODEL` | Model ID | meta-llama/Llama-3.2-3B-Instruct | +| `SAFETY_MODEL` | Optional safety model | - | +| `NETWORK_MODE` | Docker network mode | auto-configured | +| `OLLAMA_URL` | Ollama API URL | auto-configured | -## 2 - Running the Ollama Server in a Docker Container +## Notes -```sh -docker-compose -f docker.compose.yaml up +``` +llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list +┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ +┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ +┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ +│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ +└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ ``` -## Common Gotchas - -1. Models contain different id's w.r.t. Ollama and Llama Stack. For example, Ollama refers to `Llama-3.2-3B-Instruct` as `llama3.2:1b-instruct-fp16` whereas Llama Stack refers to it as `meta-llama/Llama-3.2-3B-Instruct` - -2. Docker will likely need more system memory resources allocated to it - -## Useful ollama commands +2. Docker needs sufficient memory allocation -- `ollama list` -- `ollama help` -- `ollama ps` -- `tail -f ~/.ollama/logs/server.log` +3. Ollama commands: + ```bash + ollama list + ollama help + ollama ps + ``` -## Reference links used during development +## References -- https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml -- https://llama-stack.readthedocs.io -- https://github.com/meta-llama/llama-stack-client-python -- https://github.com/meta-llama/llama-stack -- download https://ollama.com/ -- https://www.llama.com/docs/getting_the_models/meta/ -- https://llama-stack.readthedocs.io/en/latest/getting_started/index.html -- https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py \ No newline at end of file +- [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md) +- [Llama Stack Docs](https://llama-stack.readthedocs.io) +- [Ollama](https://ollama.com/) diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml b/examples/llama_stack_client_examples/compose.fireworks.yaml similarity index 75% rename from examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml rename to examples/llama_stack_client_examples/compose.fireworks.yaml index fcac78a29..4eb5aff0b 100644 --- a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks.compose.yaml +++ b/examples/llama_stack_client_examples/compose.fireworks.yaml @@ -4,10 +4,10 @@ services: network_mode: "host" volumes: - ~/.llama:/root/.llama - - ./run.yaml:/root/llamastack-run-fireworks.yaml + - ./run.yaml:/root/run.yaml ports: - "5000:5000" - entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/llamastack-run-fireworks.yaml" + entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/run.yaml" deploy: restart_policy: condition: on-failure diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml b/examples/llama_stack_client_examples/compose.yaml similarity index 56% rename from examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml rename to examples/llama_stack_client_examples/compose.yaml index 9eff1970d..d2add1198 100644 --- a/examples/llama_stack_client_examples/llama_stack_ollama/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -1,7 +1,6 @@ services: ollama: image: ollama/ollama:latest - container_name: ollama network_mode: ${NETWORK_MODE:-bridge} volumes: - ~/.ollama:/root/.ollama @@ -16,28 +15,28 @@ services: memory: 8G # Set maximum memory reservations: memory: 8G # Set minimum memory reservation - # healthcheck: - # # ugh, no CURL in ollama image - # test: ["CMD", "curl", "-f", "http://ollama:11434"] - # interval: 10s - # timeout: 5s - # retries: 5 + healthcheck: + # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶ + # - fine + test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: ollama\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect ollama:11434 2>/dev/null | grep \"HTTP/1.1 200\""] + interval: 10s + timeout: 5s + retries: 5 ollama-init: image: ollama/ollama:latest depends_on: - - ollama - # condition: service_healthy + ollama: + condition: service_healthy network_mode: ${NETWORK_MODE:-bridge} - container_name: ollama-init environment: - OLLAMA_HOST=ollama - INFERENCE_MODEL=${INFERENCE_MODEL} - SAFETY_MODEL=${SAFETY_MODEL:-} volumes: - ~/.ollama:/root/.ollama - - ./pull-models.sh:/root/pull-models.sh - entrypoint: ["/root/pull-models.sh"] + - ./pull-models.sh:/pull-models.sh + entrypoint: ["/pull-models.sh"] llamastack: depends_on: @@ -51,7 +50,7 @@ services: - ~/.llama:/root/.llama # Link to ollama run.yaml file - ~/local/llama-stack/:/app/llama-stack-source - - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/my-run.yaml + - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml ports: - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" environment: @@ -59,15 +58,27 @@ services: - SAFETY_MODEL=${SAFETY_MODEL:-} - OLLAMA_URL=http://ollama:11434 entrypoint: > - python -m llama_stack.distribution.server.server /root/my-run.yaml \ - --port ${LLAMA_STACK_PORT:-5001} + python -m llama_stack.distribution.server.server --yaml-config /root/run.yaml --port ${LLAMA_STACK_PORT:-5001} deploy: restart_policy: condition: on-failure delay: 10s max_attempts: 3 window: 60s + notebook: + image: python:3.12 + depends_on: + llamastack: + condition: service_started + network_mode: ${NETWORK_MODE:-bridge} + volumes: + - ./notebook.ipynb:/app/notebook.ipynb + command: > + bash -c "pip install llama-stack-client jupyter nbconvert && + jupyter nbconvert --to python /app/notebook.ipynb && + python /app/notebook.py" + restart: "no" + volumes: - ollama: ollama-init: - llamastack: \ No newline at end of file + llamastack: diff --git a/examples/llama_stack_client_examples/docker.compose.yaml b/examples/llama_stack_client_examples/docker.compose.yaml deleted file mode 100644 index 76684f37b..000000000 --- a/examples/llama_stack_client_examples/docker.compose.yaml +++ /dev/null @@ -1,85 +0,0 @@ -services: - # Ollama server service - ollama: - image: ollama/ollama:latest - container_name: ollama_server - ports: - - "11434:11434" # Map Ollama's port to host - environment: - OLLAMA_DEBUG: 1 - volumes: - - ~/.ollama/models:/root/.ollama # Persist data (e.g., downloaded models) - deploy: - resources: - limits: - memory: 16G # Set maximum memory - reservations: - memory: 12G # Set minimum memory reservation - entrypoint: ["ollama", "serve"] # Start the Ollama server - restart: always # Ensure Ollama server restarts on failure - healthcheck: - # ugh, no CURL in ollama image - test: ["CMD", "curl", "-f", "http://ollama:11434"] - interval: 10s - timeout: 5s - retries: 5 - - # Ephemeral service to trigger model download - # model_downloader: - # image: curlimages/curl:latest # Use a lightweight image with curl - # depends_on: - # - ollama # Ensure the Ollama server starts first - # entrypoint: > - # sh -c "sleep 5 && - # curl -X POST http://ollama:11434/api/pull -d '{\"model\": \"llama3.2:3b-instruct-fp16\"}'" - # restart: "no" # Ensure this service doesn't restart - - ollama-init: - image: ollama/ollama:latest - depends_on: - ollama: - condition: service_started - network_mode: bridge - container_name: ollama-init - environment: - - OLLAMA_HOST=host.docker.internal - - INFERENCE_MODEL=llama3.2:3b-instruct-fp16 - volumes: - - ~/.ollama:/root/.ollama - - ./pull-models.sh:/root/pull-models.sh - entrypoint: ["/root/pull-models.sh"] - - - # tester: - # image: curlimages/curl:latest # Use a lightweight image with curl - # depends_on: - # - model_downloader # Ensure the Ollama server starts first - # entrypoint: > - # sh -c "sleep 5 && - # curl -X POST http://ollama:11434/api/generate -d '{\"model\": \"llama3.2:3b-instruct-fp16\",\"prompt\": \"Say 3 words\"}'" - # restart: "no" # Ensure this service doesn't restart - - llama-stack: - depends_on: - ollama: - condition: service_started - ollama-init: - condition: service_started - image: llamastack/distribution-ollama - container_name: llama_stack_server - ports: - - "5001:5001" - volumes: - - "~/.ollama/models:/root/.ollama" - - "./llama-stack-server-config.yaml:/root/my-run.yaml" - environment: - - INFERENCE_MODEL=meta-llama/Llama-3.2-3B-Instruct - - OLLAMA_URL=http://ollama:11434 - command: > - --yaml-config /root/my-run.yaml - --port 5001 - platform: linux/amd64 - -networks: - default: - driver: bridge diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml b/examples/llama_stack_client_examples/fireworks.config.yaml similarity index 100% rename from examples/llama_stack_client_examples/llama_stack_fireworks/fireworks-server-config.yaml rename to examples/llama_stack_client_examples/fireworks.config.yaml diff --git a/examples/llama_stack_client_examples/llama-stack-server-config.yaml b/examples/llama_stack_client_examples/llama-stack-server-config.yaml deleted file mode 100644 index c51a454eb..000000000 --- a/examples/llama_stack_client_examples/llama-stack-server-config.yaml +++ /dev/null @@ -1,54 +0,0 @@ -version: '2' -image_name: ollama -docker_image: null -conda_env: ollama -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: ollama - provider_type: remote::ollama - config: - url: ${env.OLLAMA_URL} - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/ollama}/registry.db -models: -- metadata: {} - model_id: ${env.INFERENCE_MODEL} - provider_id: ollama - provider_model_id: null -shields: [] -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md b/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md deleted file mode 100644 index 75c10f74b..000000000 --- a/examples/llama_stack_client_examples/llama_stack_fireworks/README.fireworks.md +++ /dev/null @@ -1,16 +0,0 @@ -# TLDR - -Here are the links of where to find Fireworks integration examples in the Llama Stack repository - -*Disclaimer: This Llama Stack Server + Fireworks setup has been tested but NOT shown to ever work end-2-end* - -## - -https://github.com/meta-llama/llama-stack/blob/main/distributions/fireworks/run.yaml -https://github.com/meta-llama/llama-stack/tree/main/llama_stack/templates/fireworks - -## - -```sh -docker-compose -f fireworks.compose.yaml up -``` diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/README.md b/examples/llama_stack_client_examples/llama_stack_ollama/README.md deleted file mode 100644 index 751820d8a..000000000 --- a/examples/llama_stack_client_examples/llama_stack_ollama/README.md +++ /dev/null @@ -1,4 +0,0 @@ - -chmod +x pull_models.sh - -docker-compose -f compose.yaml up \ No newline at end of file diff --git a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb b/examples/llama_stack_client_examples/notebook.ipynb similarity index 99% rename from examples/llama_stack_client_examples/llama_stack_client_examples.ipynb rename to examples/llama_stack_client_examples/notebook.ipynb index 384290cc8..f2b2fc5c8 100644 --- a/examples/llama_stack_client_examples/llama_stack_client_examples.ipynb +++ b/examples/llama_stack_client_examples/notebook.ipynb @@ -15,6 +15,16 @@ "First let's install the required packages" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "python -m venv .venv\n", + "source .venv/bin/activate" + ] + }, { "cell_type": "code", "execution_count": 1, diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml b/examples/llama_stack_client_examples/run-safety-shield.yaml similarity index 100% rename from examples/llama_stack_client_examples/llama_stack_ollama/run-with-safety.yaml rename to examples/llama_stack_client_examples/run-safety-shield.yaml diff --git a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml b/examples/llama_stack_client_examples/run.yaml similarity index 96% rename from examples/llama_stack_client_examples/llama_stack_ollama/run.yaml rename to examples/llama_stack_client_examples/run.yaml index 32137fd67..63729c0a1 100644 --- a/examples/llama_stack_client_examples/llama_stack_ollama/run.yaml +++ b/examples/llama_stack_client_examples/run.yaml @@ -46,9 +46,8 @@ models: - metadata: {} model_id: ${env.INFERENCE_MODEL} provider_id: ollama - provider_model_id: null shields: [] memory_banks: [] datasets: [] scoring_fns: [] -eval_tasks: [] \ No newline at end of file +eval_tasks: [] From 034e25c25ce259ed7b40af475f0bf460686180e5 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 10:30:47 -0600 Subject: [PATCH 38/69] readme Signed-off-by: Teo --- .../llama_stack_client_examples/README.md | 21 +------------------ 1 file changed, 1 insertion(+), 20 deletions(-) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 9c6a4ca40..54efd34f4 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -7,26 +7,7 @@ Run Llama Stack with Ollama - either local or containerized. Just run: ```bash -./start-stack.sh -``` - -The script will: -1. Check if Ollama is already running locally -2. Check if Llama Stack server is already running -3. Guide you through what needs to be started - -Example outputs: - -```bash -# Scenario 1: Ollama running locally -✓ Ollama server is running locally -✗ No Llama Stack server detected -Start Llama Stack server? [Y/n] - -# Scenario 2: Nothing running -✗ No local Ollama server detected -✗ No Llama Stack server detected -No Ollama server detected. Start both Ollama and Llama Stack? [Y/n] +docker compose up ``` ## Environment Variables From 796b6bc004f9b2fb4fc53eba0358e00acd9557f9 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 10:37:01 -0600 Subject: [PATCH 39/69] maintain filename standards under /root Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index d2add1198..3a17e9340 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -35,8 +35,8 @@ services: - SAFETY_MODEL=${SAFETY_MODEL:-} volumes: - ~/.ollama:/root/.ollama - - ./pull-models.sh:/pull-models.sh - entrypoint: ["/pull-models.sh"] + - ./pull-models.sh:/root/pull-models.sh + entrypoint: ["/root/pull-models.sh"] llamastack: depends_on: From 3a8ca517cc0ec268a660545ccc5dbb4442ce294a Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 10:45:42 -0600 Subject: [PATCH 40/69] ollama: healthcheck on localhost rather; healthcheck relaxed Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 13 +++++-------- 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 3a17e9340..3f3ea1a32 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -18,16 +18,15 @@ services: healthcheck: # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶ # - fine - test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: ollama\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect ollama:11434 2>/dev/null | grep \"HTTP/1.1 200\""] - interval: 10s + test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect localhost:11434 2>/dev/null | grep \"HTTP/1.1 200\""] + interval: 3s timeout: 5s retries: 5 ollama-init: image: ollama/ollama:latest depends_on: - ollama: - condition: service_healthy + - ollama network_mode: ${NETWORK_MODE:-bridge} environment: - OLLAMA_HOST=ollama @@ -40,10 +39,8 @@ services: llamastack: depends_on: - ollama: - condition: service_started - ollama-init: - condition: service_started + - ollama + - ollama-init image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} network_mode: ${NETWORK_MODE:-bridge} volumes: From 998231eb321795aaf69caa2d8974329da60a3c32 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:02:08 -0600 Subject: [PATCH 41/69] progress, api hitting, network ok Signed-off-by: Teo --- .../llama_stack_client_examples/compose.yaml | 58 ++++++++++--------- 1 file changed, 32 insertions(+), 26 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 3f3ea1a32..5a7bcb78f 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -1,33 +1,32 @@ services: ollama: + hostname: ollama + # extra_hosts: + # - "host.docker.internal:host-gateway" image: ollama/ollama:latest - network_mode: ${NETWORK_MODE:-bridge} volumes: - ~/.ollama:/root/.ollama - ports: - - "11434:11434" environment: OLLAMA_DEBUG: 1 command: [] deploy: resources: limits: - memory: 8G # Set maximum memory + memory: 4G # Set maximum memory reservations: - memory: 8G # Set minimum memory reservation + memory: 2G # Set minimum memory reservation healthcheck: - # ̶u̶g̶h̶,̶ ̶n̶o̶ ̶C̶U̶R̶L̶ ̶i̶n̶ ̶o̶l̶l̶a̶m̶a̶ ̶i̶m̶a̶g̶e̶ - # - fine - test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\nConnection: close\\r\\n\\r\\n\" | openssl s_client -connect localhost:11434 2>/dev/null | grep \"HTTP/1.1 200\""] + test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n\" | nc localhost 11434 | grep -q \"HTTP/1.1 200\""] interval: 3s timeout: 5s retries: 5 + networks: + - ollama-network ollama-init: image: ollama/ollama:latest depends_on: - ollama - network_mode: ${NETWORK_MODE:-bridge} environment: - OLLAMA_HOST=ollama - INFERENCE_MODEL=${INFERENCE_MODEL} @@ -36,13 +35,16 @@ services: - ~/.ollama:/root/.ollama - ./pull-models.sh:/root/pull-models.sh entrypoint: ["/root/pull-models.sh"] + networks: + - ollama-network llamastack: depends_on: - - ollama - - ollama-init + ollama: + condition: service_healthy + ollama-init: + condition: service_started image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} - network_mode: ${NETWORK_MODE:-bridge} volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file @@ -62,20 +64,24 @@ services: delay: 10s max_attempts: 3 window: 60s - notebook: - image: python:3.12 - depends_on: - llamastack: - condition: service_started - network_mode: ${NETWORK_MODE:-bridge} - volumes: - - ./notebook.ipynb:/app/notebook.ipynb - command: > - bash -c "pip install llama-stack-client jupyter nbconvert && - jupyter nbconvert --to python /app/notebook.ipynb && - python /app/notebook.py" - restart: "no" - + # notebook: + # image: python:3.12 + # depends_on: + # llamastack: + # condition: service_started + # network_mode: ${NETWORK_MODE:-bridge} + # volumes: + # - ./notebook.ipynb:/app/notebook.ipynb + # command: > + # bash -c "pip install llama-stack-client jupyter nbconvert && + # jupyter nbconvert --to python /app/notebook.ipynb && + # python /app/notebook.py" + # restart: "no" +networks: + llama-stack: + driver: bridge + ollama-network: + driver: bridge volumes: ollama-init: llamastack: From e6d2200634ba009b6afb9e3834d625ceb6f670bc Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:02:08 -0600 Subject: [PATCH 42/69] Is this path relevant? Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 5a7bcb78f..836918456 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -48,7 +48,7 @@ services: volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file - - ~/local/llama-stack/:/app/llama-stack-source + # - ~/local/llama-stack/:/app/llama-stack-source - ./run${SAFETY_MODEL:+-with-safety}.yaml:/root/run.yaml ports: - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" From eef3730b253d3e667d7b0339d7a760b0d5b109f9 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:05:22 -0600 Subject: [PATCH 43/69] INFERENCE_MODEL default Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 836918456..9b6baa654 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -29,7 +29,7 @@ services: - ollama environment: - OLLAMA_HOST=ollama - - INFERENCE_MODEL=${INFERENCE_MODEL} + - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:3b-instruct-fp16} - SAFETY_MODEL=${SAFETY_MODEL:-} volumes: - ~/.ollama:/root/.ollama From 537f95047b8e9ed3d7ea638dbfe2a92a830ecf94 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:07:55 -0600 Subject: [PATCH 44/69] unused network Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 2 -- 1 file changed, 2 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 9b6baa654..326922f26 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -78,8 +78,6 @@ services: # python /app/notebook.py" # restart: "no" networks: - llama-stack: - driver: bridge ollama-network: driver: bridge volumes: From 0c2d9d9dfb3bbce51fa71d22f956c206d9efebe9 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:16:04 -0600 Subject: [PATCH 45/69] host:port instead of URL in run.yaml Signed-off-by: Teo --- examples/llama_stack_client_examples/run.yaml | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml index 63729c0a1..fd5fb95c6 100644 --- a/examples/llama_stack_client_examples/run.yaml +++ b/examples/llama_stack_client_examples/run.yaml @@ -13,7 +13,8 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - url: ${env.OLLAMA_URL:http://localhost:11434} + host: ollama + port: 11434 memory: - provider_id: faiss provider_type: inline::faiss From 68218d26ca4c534b2fee2077f10dd7f9fd52b398 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 11:26:25 -0600 Subject: [PATCH 46/69] seems fixed Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 14 +++++++++----- 1 file changed, 9 insertions(+), 5 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 326922f26..526678bc9 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -1,8 +1,8 @@ services: ollama: hostname: ollama - # extra_hosts: - # - "host.docker.internal:host-gateway" + extra_hosts: + - "host.docker.internal:host-gateway" image: ollama/ollama:latest volumes: - ~/.ollama:/root/.ollama @@ -16,7 +16,7 @@ services: reservations: memory: 2G # Set minimum memory reservation healthcheck: - test: ["CMD", "sh", "-c", "echo -e \"GET / HTTP/1.1\\r\\nHost: localhost\\r\\n\\r\\n\" | nc localhost 11434 | grep -q \"HTTP/1.1 200\""] + test: ["CMD", "bash", "-c", " Date: Tue, 3 Dec 2024 11:33:19 -0600 Subject: [PATCH 47/69] on non Apple silicon: must try between llamastack-local-cpu and distirbution-ollama images Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 526678bc9..08fc88f53 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -44,8 +44,7 @@ services: condition: service_healthy ollama-init: condition: service_started - image: ${LLAMA_STACK_IMAGE:-llamastack/llamastack-local-cpu} - platform: linux/arm64 + image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} volumes: - ~/.llama:/root/.llama # Link to ollama run.yaml file From 3f7fc6800c1b09402c382d7ccd3a903f8a0036c5 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 12:01:10 -0600 Subject: [PATCH 48/69] providers.config.url | ollama HOST Signed-off-by: Teo --- examples/llama_stack_client_examples/run.yaml | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/examples/llama_stack_client_examples/run.yaml b/examples/llama_stack_client_examples/run.yaml index fd5fb95c6..4d148ad95 100644 --- a/examples/llama_stack_client_examples/run.yaml +++ b/examples/llama_stack_client_examples/run.yaml @@ -13,8 +13,7 @@ providers: - provider_id: ollama provider_type: remote::ollama config: - host: ollama - port: 11434 + url: ${env.OLLAMA_URL:http://ollama:11434} memory: - provider_id: faiss provider_type: inline::faiss From 95878e253d7b6b0780938182c8a1eb80d47568aa Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 12:33:28 -0600 Subject: [PATCH 49/69] save Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 08fc88f53..6ef8d394c 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -53,7 +53,7 @@ services: ports: - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" environment: - - INFERENCE_MODEL=${INFERENCE_MODEL} + - INFERENCE_MODEL=${INFERENCE_MODEL:-} # ? - SAFETY_MODEL=${SAFETY_MODEL:-} - OLLAMA_URL=http://ollama:11434 entrypoint: > From 42e4c4fee1ffc4bb8b6fc30c9b3a3bd83123d646 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 13:11:34 -0600 Subject: [PATCH 50/69] env.tpl Signed-off-by: Teo --- examples/llama_stack_client_examples/.env.tpl | 5 +++++ 1 file changed, 5 insertions(+) create mode 100644 examples/llama_stack_client_examples/.env.tpl diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl new file mode 100644 index 000000000..06ef2065d --- /dev/null +++ b/examples/llama_stack_client_examples/.env.tpl @@ -0,0 +1,5 @@ +INFERENCE_MODEL=meta-llama/Llama-3.2-1B +OLLAMA_MODEL=llama3.2:1b-instruct-fp16 + + + From a3a81fcf90a0a43f5ab67b16bb734ebdf25ca875 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 13:11:44 -0600 Subject: [PATCH 51/69] right configs Signed-off-by: Teo --- examples/llama_stack_client_examples/compose.yaml | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index 6ef8d394c..d542d93e7 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -29,7 +29,7 @@ services: - ollama environment: - OLLAMA_HOST=ollama - - INFERENCE_MODEL=${INFERENCE_MODEL:-llama3.2:latest} + - INFERENCE_MODEL=${OLLAMA_MODEL:-llama3.2:3b-instruct-fp16} - SAFETY_MODEL=${SAFETY_MODEL:-} volumes: - ~/.ollama:/root/.ollama @@ -43,7 +43,7 @@ services: ollama: condition: service_healthy ollama-init: - condition: service_started + condition: service_completed_successfully image: ${LLAMA_STACK_IMAGE:-llamastack/distribution-ollama} volumes: - ~/.llama:/root/.llama @@ -53,7 +53,7 @@ services: ports: - "${LLAMA_STACK_PORT:-5001}:${LLAMA_STACK_PORT:-5001}" environment: - - INFERENCE_MODEL=${INFERENCE_MODEL:-} # ? + - INFERENCE_MODEL=${INFERENCE_MODEL:-meta-llama/Llama-3.2-3B-Instruct} # ? - SAFETY_MODEL=${SAFETY_MODEL:-} - OLLAMA_URL=http://ollama:11434 entrypoint: > From ebb2ea1566b2692868d0a381032e2debb6276509 Mon Sep 17 00:00:00 2001 From: Teo Date: Tue, 3 Dec 2024 13:18:29 -0600 Subject: [PATCH 52/69] DONE Signed-off-by: Teo --- .../llama_stack_client_examples/README.md | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index 54efd34f4..d558f7e2c 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -15,19 +15,25 @@ docker compose up | Variable | Description | Default | |----------|-------------|---------| | `LLAMA_STACK_PORT` | Server port | 5001 | -| `INFERENCE_MODEL` | Model ID | meta-llama/Llama-3.2-3B-Instruct | +| `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct | +| `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 | +| ⚠️ **Important:** | The model IDs must match their respective formats - Ollama and Llama Stack use different naming conventions for the same models | - | | `SAFETY_MODEL` | Optional safety model | - | | `NETWORK_MODE` | Docker network mode | auto-configured | | `OLLAMA_URL` | Ollama API URL | auto-configured | -## Notes +## Common Gotchas + +1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently - for instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama. + +2. Ensure Docker has sufficient system memory allocation to run properly ``` llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list ┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ ┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ ┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ -│ meta-llama/Llama-3.2-3B-Instruct │ ollama │ llama3.2:3b-instruct-fp16 │ │ +│ meta-llama/Llama-3.2-1B-Instruct │ ollama │ llama3.2:1b-instruct-fp16 │ │ └──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ ``` @@ -42,6 +48,14 @@ llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list ## References +- [Download Ollama](https://ollama.com/) - [Llama Stack Fireworks](./llama_stack_fireworks/README.fireworks.md) - [Llama Stack Docs](https://llama-stack.readthedocs.io) -- [Ollama](https://ollama.com/) +- [Ollama Run YAML Template](https://github.com/meta-llama/llama-stack/blob/main/llama_stack/templates/ollama/run.yaml) +- [Llama Stack Documentation](https://llama-stack.readthedocs.io) +- [Llama Stack Client Python](https://github.com/meta-llama/llama-stack-client-python) +- [Llama Stack Repository](https://github.com/meta-llama/llama-stack) +- [Meta Models Documentation](https://www.llama.com/docs/getting_the_models/meta/) +- [Getting Started Guide](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) +- [Agents Example](https://github.com/meta-llama/llama-stack-apps/blob/main/examples/agents/hello.py) +- [Model Download Reference](https://llama-stack.readthedocs.io/en/latest/references/llama_cli_reference/download_models.html) From 77a98a3d29c7d159bb60c1492626daa1222c16cb Mon Sep 17 00:00:00 2001 From: tad dy Date: Tue, 3 Dec 2024 18:36:52 -0500 Subject: [PATCH 53/69] pushing enhancement before merge --- agentops/llms/llama_stack_client.py | 2 +- .../compose.fireworks.yaml | 16 -- .../fireworks.config.yaml | 59 ----- .../notebook.ipynb | 205 +++++++++--------- .../llama_stack_client_canary/agent_canary.py | 6 +- .../inference_canary_2.py | 4 +- tests/llama_stack/test_llama_stack.py | 80 +++---- 7 files changed, 141 insertions(+), 231 deletions(-) delete mode 100644 examples/llama_stack_client_examples/compose.fireworks.yaml delete mode 100644 examples/llama_stack_client_examples/fireworks.config.yaml diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index e5c2c1559..ab2f1e356 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -60,7 +60,7 @@ def handle_stream_chunk(chunk: dict): {"content": message.content, "role": message.role} for message in kwargs["messages"] ] llm_event.agent_id = check_call_stack_for_agent_id() - llm_event.model = metadata.get("model_id", "Unable to identify model") + llm_event.model = kwargs["model_id"] llm_event.prompt_tokens = None llm_event.completion = accum_delta or kwargs["completion"] llm_event.completion_tokens = None diff --git a/examples/llama_stack_client_examples/compose.fireworks.yaml b/examples/llama_stack_client_examples/compose.fireworks.yaml deleted file mode 100644 index 4eb5aff0b..000000000 --- a/examples/llama_stack_client_examples/compose.fireworks.yaml +++ /dev/null @@ -1,16 +0,0 @@ -services: - llamastack: - image: llamastack/distribution-fireworks - network_mode: "host" - volumes: - - ~/.llama:/root/.llama - - ./run.yaml:/root/run.yaml - ports: - - "5000:5000" - entrypoint: bash -c "python -m llama_stack.distribution.server.server --yaml_config /root/run.yaml" - deploy: - restart_policy: - condition: on-failure - delay: 3s - max_attempts: 5 - window: 60s \ No newline at end of file diff --git a/examples/llama_stack_client_examples/fireworks.config.yaml b/examples/llama_stack_client_examples/fireworks.config.yaml deleted file mode 100644 index cb9dd2cbc..000000000 --- a/examples/llama_stack_client_examples/fireworks.config.yaml +++ /dev/null @@ -1,59 +0,0 @@ -version: '2' -image_name: fireworks -docker_image: null -conda_env: fireworks -apis: -- agents -- inference -- memory -- safety -- telemetry -providers: - inference: - - provider_id: fireworks - provider_type: remote::fireworks - config: - url: "https://api.fireworks.ai/inference" - api_key: "" - memory: - - provider_id: faiss - provider_type: inline::faiss - config: - kvstore: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/faiss_store.db - safety: - - provider_id: llama-guard - provider_type: inline::llama-guard - config: {} - agents: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: - persistence_store: - type: sqlite - namespace: null - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/agents_store.db - telemetry: - - provider_id: meta-reference - provider_type: inline::meta-reference - config: {} -metadata_store: - namespace: null - type: sqlite - db_path: ${env.SQLITE_STORE_DIR:~/.llama/distributions/fireworks}/registry.db -models: -- metadata: {} - model_id: meta-llama/Llama-3.2-1B-Instruct - provider_id: null - provider_model_id: fireworks/llama-v3p2-1b-instruct -shields: -- params: null - shield_id: meta-llama/Llama-Guard-3-8B - provider_id: null - provider_shield_id: null -memory_banks: [] -datasets: [] -scoring_fns: [] -eval_tasks: [] \ No newline at end of file diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/notebook.ipynb index f2b2fc5c8..4f8b96e68 100644 --- a/examples/llama_stack_client_examples/notebook.ipynb +++ b/examples/llama_stack_client_examples/notebook.ipynb @@ -19,22 +19,31 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [], + "outputs": [ + { + "ename": "SyntaxError", + "evalue": "invalid syntax (2472932708.py, line 1)", + "output_type": "error", + "traceback": [ + "\u001b[0;36m Cell \u001b[0;32mIn[1], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m python -m venv .venv\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" + ] + } + ], "source": [ - "python -m venv .venv\n", - "source .venv/bin/activate" + "# python -m venv .venv\n", + "# source .venv/bin/activate" ] }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 2, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n", + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n", "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", @@ -67,13 +76,13 @@ "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.56)\n", + "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n", "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n", "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n", "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n", "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n", - "Requirement already satisfied: llama-models>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n", - "Requirement already satisfied: llama-stack-client>=0.0.56 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.56)\n", + "Requirement already satisfied: llama-models>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n", + "Requirement already satisfied: llama-stack-client>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n", "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n", "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n", "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n", @@ -81,18 +90,18 @@ "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n", "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n", "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (3.1.4)\n", - "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (0.8.0)\n", - "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.56->llama-stack) (11.0.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.6.2.post1)\n", - "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (2.2.3)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (24.9.0)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.67.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.56->llama-stack) (4.12.2)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (6.0.2)\n", + "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (3.1.4)\n", + "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (0.8.0)\n", + "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (11.0.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.6.2.post1)\n", + "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.9.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (2.2.3)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (24.9.0)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.67.0)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.12.2)\n", "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n", "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n", "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n", @@ -110,13 +119,13 @@ "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n", "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n", "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.56->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.56->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.56->llama-stack) (2024.11.6)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.56->llama-stack) (1.16.0)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.57->llama-stack) (3.0.2)\n", + "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.57->llama-stack) (2024.11.6)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.57->llama-stack) (1.16.0)\n", "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", @@ -152,6 +161,27 @@ "\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n", + "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (0.49b2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (4.12.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", + "\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", + "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", "Note: you may need to restart the kernel to use updated packages.\n" ] } @@ -161,7 +191,9 @@ "%pip install -U llama-stack\n", "%pip install -U agentops\n", "%pip install -U python-dotenv\n", - "%pip install -U fastapi\n" + "%pip install -U fastapi\n", + "%pip install opentelemetry-api\n", + "%pip install opentelemetry-sdk\n" ] }, { @@ -173,7 +205,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": 3, "metadata": {}, "outputs": [], "source": [ @@ -190,7 +222,7 @@ "load_dotenv()\n", "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"\n", "\n", - "# agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", "\n", "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", "port = 5001 # LLAMA_STACK_PORT\n", @@ -206,40 +238,36 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Completion Example" + "# Inference Canary 1 - Completion with Streaming" ] }, { "cell_type": "code", - "execution_count": 5, + "execution_count": 4, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=5f22f2fd-2561-4b8d-8d8c-1ae875d8075c\u001b[0m\u001b[0m\n" + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n" ] }, { - "ename": "InternalServerError", - "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/agentops/llms/llama_stack_client.py:207\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 205\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 206\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 207\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 208\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", - "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", + "\n", + "\u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[33m of\u001b[0m\u001b[33m night\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n" ] } ], @@ -248,15 +276,17 @@ "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", - " content=\"write me a 3 word poem about the moon\",\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", " role=\"user\",\n", " ),\n", " ],\n", " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", - " stream=False\n", + " stream=True\n", ")\n", "\n", - "print(f\"> Response: {response}\")\n", + "async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", "agentops.end_session(\"Success\")" ] }, @@ -264,76 +294,49 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "## Completion with Streaming Example" + "# Inference Canary Example 2 - Completion without Streaming" ] }, { "cell_type": "code", - "execution_count": 6, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", - "\n", - "\u001b[0m\u001b[33m\"L\u001b[0m\u001b[33munar\u001b[0m\u001b[33m Gentle\u001b[0m\u001b[33m Glow\u001b[0m\u001b[33m\"\u001b[0m\u001b[97m\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 2.1s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9a70187c-87c5-4e7b-bb63-68e303df041e\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", " messages=[\n", " UserMessage(\n", - " content=\"hello world, write me a 3 word poem about the moon\",\n", + " content=\"write me a 3 word poem about the moon\",\n", " role=\"user\",\n", " ),\n", " ],\n", " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", - " stream=True\n", + " stream=False\n", ")\n", "\n", - "async for log in EventLogger().log(response):\n", - " log.print()\n", - "\n", + "print(f\"> Response: {response}\")\n", "agentops.end_session(\"Success\")" ] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Agent Canary Example" + ] + }, { "cell_type": "code", - "execution_count": null, + "execution_count": 11, "metadata": {}, "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "_!_!_ LlamaStackClientProvider _!_!_\n", - "_!_!_ override _!_!_\n" - ] - }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n" + "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n" ] }, { @@ -342,16 +345,16 @@ "text": [ "No available shields. Disable safety.\n", "Using model: meta-llama/Llama-3.2-1B-Instruct\n", - "response=.async_generator at 0x10ee067a0>\n", - "response=.async_generator at 0x10ee70900>\n" + "response=.async_generator at 0x1304e75a0>\n", + "response=.async_generator at 0x1304e7140>\n" ] }, { "name": "stderr", "output_type": "stream", "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=cd9b9c7f-4335-49f8-ae8a-71a625f0eb06\u001b[0m\u001b[0m\n" + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n" ] } ], diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index ea0042d36..73513cfa5 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -12,9 +12,9 @@ LLAMA_STACK_PORT = 5001 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" -import debugpy -debugpy.listen(5678) -debugpy.wait_for_client() +# import debugpy +# debugpy.listen(5678) +# debugpy.wait_for_client() agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py index f18db96bf..b4a6aea3f 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -41,7 +41,7 @@ async def stream_test(): def main(): agentops.start_session() - client.inference.chat_completion( + response = client.inference.chat_completion( messages=[ UserMessage( content="hello world, write me a 3 word poem about the moon", @@ -51,6 +51,8 @@ def main(): model_id="meta-llama/Llama-3.2-1B-Instruct", stream=False, ) + + print(response) agentops.end_session(end_state="Success") diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py index b93a18567..47b80e767 100644 --- a/tests/llama_stack/test_llama_stack.py +++ b/tests/llama_stack/test_llama_stack.py @@ -1,57 +1,37 @@ -import pytest -import requests_mock -import time - from llama_stack_client import LlamaStackClient from llama_stack_client.types import UserMessage from llama_stack_client.lib.inference.event_logger import EventLogger - - -@pytest.fixture(autouse=True) -def setup_teardown(): - yield - - -@pytest.fixture(autouse=True, scope="function") -def mock_req(): - with requests_mock.Mocker() as m: - url = "http://localhost:5001" - m.post(url + "/v2/create_events", json={"status": "ok"}) - m.post(url + "/v2/create_session", json={"status": "success", "jwt": "some_jwt"}) - - yield m - +from unittest.mock import MagicMock class TestLlamaStack: def setup_method(self): - - print("...Setting up LlamaStackClient...") - - host = "0.0.0.0" # LLAMA_STACK_HOST - port = 5001 # LLAMA_STACK_PORT - - full_host = f"http://{host}:{port}" - - self.client = LlamaStackClient( - base_url=f"{full_host}", - ) - - - def test_llama_stack_inference(self, mock_req): - - response = self.client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-1B-Instruct", - stream=False, + self.client = LlamaStackClient() + self.client.inference = MagicMock() + self.client.inference.chat_completion = MagicMock(return_value=[ + { + "choices": [ + { + "message": { + "content": "Moonlight whispers softly", + "role": "assistant", + } + } + ] + } + ]) + + + def test_llama_stack_inference(self): + self.client.inference.chat_completion.assert_not_called() + self.client.inference.chat_completion( + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=False, ) - - # async for log in EventLogger().log(response): - # log.print() - - print(response) - + self.client.inference.chat_completion.assert_called_once() + \ No newline at end of file From c77339e36cf9754091a9e86afb89fe741ae10c12 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 18:01:40 +0530 Subject: [PATCH 54/69] add `Instruct` to model name --- examples/llama_stack_client_examples/.env.tpl | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/examples/llama_stack_client_examples/.env.tpl b/examples/llama_stack_client_examples/.env.tpl index 06ef2065d..5099720e1 100644 --- a/examples/llama_stack_client_examples/.env.tpl +++ b/examples/llama_stack_client_examples/.env.tpl @@ -1,4 +1,4 @@ -INFERENCE_MODEL=meta-llama/Llama-3.2-1B +INFERENCE_MODEL=meta-llama/Llama-3.2-1B-Instruct OLLAMA_MODEL=llama3.2:1b-instruct-fp16 From 2539b7b2c7d5fe64d0398552033ed98e04d4a55b Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 18:02:08 +0530 Subject: [PATCH 55/69] clean and increase memory --- .../llama_stack_client_examples/compose.yaml | 21 +++---------------- 1 file changed, 3 insertions(+), 18 deletions(-) diff --git a/examples/llama_stack_client_examples/compose.yaml b/examples/llama_stack_client_examples/compose.yaml index d542d93e7..2139a6620 100644 --- a/examples/llama_stack_client_examples/compose.yaml +++ b/examples/llama_stack_client_examples/compose.yaml @@ -12,9 +12,9 @@ services: deploy: resources: limits: - memory: 4G # Set maximum memory + memory: 8G reservations: - memory: 2G # Set minimum memory reservation + memory: 4G healthcheck: test: ["CMD", "bash", "-c", " @@ -64,19 +62,6 @@ services: delay: 10s max_attempts: 3 window: 60s - # notebook: - # image: python:3.12 - # depends_on: - # llamastack: - # condition: service_started - # network_mode: ${NETWORK_MODE:-bridge} - # volumes: - # - ./notebook.ipynb:/app/notebook.ipynb - # command: > - # bash -c "pip install llama-stack-client jupyter nbconvert && - # jupyter nbconvert --to python /app/notebook.ipynb && - # python /app/notebook.py" - # restart: "no" networks: - ollama-network From d541d436f60a2d21ac99bb43abfa52b5d9a50101 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:16:42 +0530 Subject: [PATCH 56/69] test cleanup --- .../providers/llama_stack_client_canary/agent_canary.py | 4 ++-- .../llama_stack_client_canary/inference_canary_1.py | 4 ---- .../llama_stack_client_canary/inference_canary_2.py | 6 +----- 3 files changed, 3 insertions(+), 11 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 73513cfa5..ab6399cbc 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -78,8 +78,8 @@ async def agent_test(): print("Response: ", response) - async for log in EventLogger().log(response): - log.print() + # for log in EventLogger().log(response): + # log.print() agentops.start_session() diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py index 66717f898..afbe48ff8 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py @@ -10,10 +10,6 @@ agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) -# import debugpy -# debugpy.listen(5678) -# debugpy.wait_for_client() - host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py index b4a6aea3f..03a3f51ab 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -9,10 +9,6 @@ agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) -# import debugpy -# debugpy.listen(5678) -# debugpy.wait_for_client() - host = "0.0.0.0" # LLAMA_STACK_HOST port = 5001 # LLAMA_STACK_PORT @@ -52,7 +48,7 @@ def main(): stream=False, ) - print(response) + print(response.completion_message.content) agentops.end_session(end_state="Success") From f9a6b0700f5dc09a7fef05b415aba8c1c9d85016 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:18:09 +0530 Subject: [PATCH 57/69] clean notebook --- .../notebook.ipynb | 245 +----------------- 1 file changed, 13 insertions(+), 232 deletions(-) diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/notebook.ipynb index 4f8b96e68..6dda032f3 100644 --- a/examples/llama_stack_client_examples/notebook.ipynb +++ b/examples/llama_stack_client_examples/notebook.ipynb @@ -19,173 +19,7 @@ "cell_type": "code", "execution_count": null, "metadata": {}, - "outputs": [ - { - "ename": "SyntaxError", - "evalue": "invalid syntax (2472932708.py, line 1)", - "output_type": "error", - "traceback": [ - "\u001b[0;36m Cell \u001b[0;32mIn[1], line 1\u001b[0;36m\u001b[0m\n\u001b[0;31m python -m venv .venv\u001b[0m\n\u001b[0m ^\u001b[0m\n\u001b[0;31mSyntaxError\u001b[0m\u001b[0;31m:\u001b[0m invalid syntax\n" - ] - } - ], - "source": [ - "# python -m venv .venv\n", - "# source .venv/bin/activate" - ] - }, - { - "cell_type": "code", - "execution_count": 2, - "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.6.2.post1)\n", - "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (0.27.2)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.2.3)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (3.0.48)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (24.9.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (2.10.1)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (13.9.4)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.67.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client) (4.12.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", - "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack-client) (2.18.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.16.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: llama-stack in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.0.57)\n", - "Requirement already satisfied: blobfile in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.0)\n", - "Requirement already satisfied: fire in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.7.0)\n", - "Requirement already satisfied: httpx in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.27.2)\n", - "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.26.3)\n", - "Requirement already satisfied: llama-models>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n", - "Requirement already satisfied: llama-stack-client>=0.0.57 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (0.0.57)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (3.0.48)\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (1.0.1)\n", - "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.10.1)\n", - "Requirement already satisfied: requests in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.32.3)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (13.9.4)\n", - "Requirement already satisfied: setuptools in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (75.6.0)\n", - "Requirement already satisfied: termcolor in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (3.1.4)\n", - "Requirement already satisfied: tiktoken in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (0.8.0)\n", - "Requirement already satisfied: Pillow in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-models>=0.0.57->llama-stack) (11.0.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.6.2.post1)\n", - "Requirement already satisfied: click in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (2.2.3)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (24.9.0)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.67.0)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from llama-stack-client>=0.0.57->llama-stack) (4.12.2)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (1.0.7)\n", - "Requirement already satisfied: idna in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpx->llama-stack) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from blobfile->llama-stack) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from huggingface-hub->llama-stack) (23.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests->llama-stack) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from rich->llama-stack) (2.18.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from jinja2->llama-models>=0.0.57->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.26.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pandas->llama-stack-client>=0.0.57->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from tiktoken->llama-models>=0.0.57->llama-stack) (2024.11.6)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.57->llama-stack) (1.16.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: agentops in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.3.18)\n", - "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.32.3)\n", - "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (5.9.8)\n", - "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (23.2)\n", - "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (2.5.0)\n", - "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from agentops) (6.0.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.0.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: fastapi in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (0.115.5)\n", - "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (0.41.3)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (2.10.1)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from fastapi) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.6.2.post1)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (1.28.2)\n", - "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-sdk) (4.12.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/11_2024/agentops/venv/lib/python3.12/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", - "\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m A new release of pip is available: \u001b[0m\u001b[31;49m24.0\u001b[0m\u001b[39;49m -> \u001b[0m\u001b[32;49m24.3.1\u001b[0m\n", - "\u001b[1m[\u001b[0m\u001b[34;49mnotice\u001b[0m\u001b[1;39;49m]\u001b[0m\u001b[39;49m To update, run: \u001b[0m\u001b[32;49mpip install --upgrade pip\u001b[0m\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U llama-stack\n", @@ -243,34 +77,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mHere\u001b[0m\u001b[33m is\u001b[0m\u001b[33m a\u001b[0m\u001b[33m \u001b[0m\u001b[33m3\u001b[0m\u001b[33m-word\u001b[0m\u001b[33m poem\u001b[0m\u001b[33m about\u001b[0m\u001b[33m the\u001b[0m\u001b[33m moon\u001b[0m\u001b[33m:\n", - "\n", - "\u001b[0m\u001b[33mSilver\u001b[0m\u001b[33m glowing\u001b[0m\u001b[33m orb\u001b[0m\u001b[33m of\u001b[0m\u001b[33m night\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 5.3s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=9755b642-26e5-49ac-8371-3ff0b871a001\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", @@ -315,7 +124,7 @@ " stream=False\n", ")\n", "\n", - "print(f\"> Response: {response}\")\n", + "print(f\"> Response: {response.completion_message.content}\")\n", "agentops.end_session(\"Success\")" ] }, @@ -328,36 +137,9 @@ }, { "cell_type": "code", - "execution_count": 11, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: AgentOps has already been initialized. If you are trying to start a session, call agentops.start_session() instead.\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "No available shields. Disable safety.\n", - "Using model: meta-llama/Llama-3.2-1B-Instruct\n", - "response=.async_generator at 0x1304e75a0>\n", - "response=.async_generator at 0x1304e7140>\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 0.5s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=0b54e8e9-dc76-447e-b1f5-94583dbe124a\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "from llama_stack_client import LlamaStackClient\n", @@ -365,15 +147,12 @@ "from llama_stack_client.lib.agents.event_logger import EventLogger\n", "from llama_stack_client.types.agent_create_params import AgentConfig\n", "\n", - "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", - "\n", - "# Apply nest_asyncio to handle nested event loops\n", - "# nest_asyncio.apply()\n", + "agentops.start_session()\n", "\n", "LLAMA_STACK_PORT = 5001\n", "\n", "# Replace with actual API keys for functionality\n", - "os.environ[\"BRAVE_SEARCH_API_KEY\"] = \"your-brave-search-api-key\"\n", + "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n", "\n", "async def agent_test():\n", " client = LlamaStackClient(\n", @@ -404,7 +183,7 @@ " {\n", " \"type\": \"brave_search\",\n", " \"engine\": \"brave\",\n", - " \"api_key\": os.getenv(\"BRAVE_SEARCH_API_KEY\"),\n", + " \"api_key\": BRAVE_SEARCH_API_KEY,\n", " }\n", " ],\n", " tool_choice=\"auto\",\n", @@ -434,7 +213,7 @@ "\n", " print(f\"{response=}\")\n", "\n", - " # async for log in EventLogger().log(response):\n", + " # for log in EventLogger().log(response):\n", " # log.print()\n", "\n", "agentops.start_session()\n", @@ -449,7 +228,9 @@ "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "agentops.end_all_sessions()" + ] } ], "metadata": { From 51847e7d0e31f76e638476d17ec36916d375d466 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:20:58 +0530 Subject: [PATCH 58/69] updated examples readme and notebook renamed --- .../llama_stack_client_examples/README.md | 28 ++++--------------- ...tebook.ipynb => llama_stack_example.ipynb} | 0 2 files changed, 6 insertions(+), 22 deletions(-) rename examples/llama_stack_client_examples/{notebook.ipynb => llama_stack_example.ipynb} (100%) diff --git a/examples/llama_stack_client_examples/README.md b/examples/llama_stack_client_examples/README.md index d558f7e2c..c838096d5 100644 --- a/examples/llama_stack_client_examples/README.md +++ b/examples/llama_stack_client_examples/README.md @@ -1,15 +1,17 @@ # Llama Stack Client Examples -Run Llama Stack with Ollama - either local or containerized. +The example notebook demonstrates how to use the Llama Stack Client to monitor an Agentic application using AgentOps. We have also provided a `compose.yaml` file to run Ollama in a container. ## Quick Start -Just run: +First run the following command to start the Ollama server with the Llama Stack client: ```bash docker compose up ``` +Next, run the [notebook](./llama_stack_example.ipynb) to see the waterfall visualization in the [AgentOps](https://app.agentops.ai) dashboard. + ## Environment Variables | Variable | Description | Default | @@ -17,34 +19,16 @@ docker compose up | `LLAMA_STACK_PORT` | Server port | 5001 | | `INFERENCE_MODEL` | Model ID (must match Llama Stack format) | meta-llama/Llama-3.2-1B-Instruct | | `OLLAMA_MODEL` | Ollama model ID (must match Ollama format) | llama3.2:1b-instruct-fp16 | -| ⚠️ **Important:** | The model IDs must match their respective formats - Ollama and Llama Stack use different naming conventions for the same models | - | | `SAFETY_MODEL` | Optional safety model | - | | `NETWORK_MODE` | Docker network mode | auto-configured | | `OLLAMA_URL` | Ollama API URL | auto-configured | ## Common Gotchas -1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently - for instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama. - -2. Ensure Docker has sufficient system memory allocation to run properly - -``` -llama-stack-client --endpoint http://localhost:$LLAMA_STACK_PORT models list -┏━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━━━━┳━━━━━━━━━━━━━━━━━━━━━━━━━━━┳━━━━━━━━━━┓ -┃ identifier ┃ provider_id ┃ provider_resource_id ┃ metadata ┃ -┡━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━━━━╇━━━━━━━━━━━━━━━━━━━━━━━━━━━╇━━━━━━━━━━┩ -│ meta-llama/Llama-3.2-1B-Instruct │ ollama │ llama3.2:1b-instruct-fp16 │ │ -└──────────────────────────────────┴─────────────┴───────────────────────────┴──────────┘ -``` +1. Model naming conventions differ between Ollama and Llama Stack. The same model is referenced differently. For instance, `meta-llama/Llama-3.2-1B-Instruct` in Llama Stack corresponds to `llama3.2:1b-instruct-fp16` in Ollama. -2. Docker needs sufficient memory allocation +2. Ensure Docker is configured with sufficient system memory allocation to run properly. -3. Ollama commands: - ```bash - ollama list - ollama help - ollama ps - ``` ## References diff --git a/examples/llama_stack_client_examples/notebook.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb similarity index 100% rename from examples/llama_stack_client_examples/notebook.ipynb rename to examples/llama_stack_client_examples/llama_stack_example.ipynb From 81651fab31a654db5c0a37b160b4c158245ae0dc Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:30:30 +0530 Subject: [PATCH 59/69] updated docs --- docs/v1/integrations/llama_stack.mdx | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) diff --git a/docs/v1/integrations/llama_stack.mdx b/docs/v1/integrations/llama_stack.mdx index 163a4ca8e..bb0f9a83c 100644 --- a/docs/v1/integrations/llama_stack.mdx +++ b/docs/v1/integrations/llama_stack.mdx @@ -1,14 +1,14 @@ --- title: 'Llama Stack' -description: '[Llama Stack](https://llama-stack.readthedocs.io/) is a framework for building Agentic applications.' +description: 'Llama Stack is a framework from Meta AI for building Agentic applications.' --- import CodeTooltip from '/snippets/add-code-tooltip.mdx' import EnvTooltip from '/snippets/add-env-tooltip.mdx' -AgentOps has built an integration with Llama Stack to make monitoring applications that leverage [llama-stack-client-python](https://github.com/meta-llama/llama-stack-client-python) simple. +AgentOps integrates with Llama Stack via its python [client](https://github.com/meta-llama/llama-stack-client-python) to provide observability into applications that leverage it. -Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. +Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io/) available as well as a great [quickstart](https://llama-stack.readthedocs.io/en/latest/getting_started/index.html) guide. You can use this guide to setup the Llama Stack server and client or alternatively use our Docker [compose](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/docker-compose.yml) file. ## Adding AgentOps to Llama Stack applications @@ -23,7 +23,17 @@ Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io ``` - + + + ```bash pip + pip install llama-stack-client + ``` + ```bash poetry + poetry add llama-stack-client + ``` + + + @@ -51,12 +61,9 @@ Llama Stack has comprehensive [documentation](https://llama-stack.readthedocs.io -## Llama Stack + AgentOps Examples +## Examples - - - - +An example notebook is available [here](https://github.com/AgentOps-AI/agentops/blob/main/examples/llama_stack_client_examples/notebook.ipynb) to showcase how to use the Llama Stack client with AgentOps. From d76b784ad590c2c18f240a75f672a8ff08cbccdb Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:32:51 +0530 Subject: [PATCH 60/69] clean integration code --- agentops/llms/llama_stack_client.py | 17 ----------------- 1 file changed, 17 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index ab2f1e356..19429999a 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -34,15 +34,9 @@ def handle_stream_chunk(chunk: dict): nonlocal stack # NOTE: prompt/completion usage not returned in response when streaming - # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - # if llm_event.returns is None: - # llm_event.returns = chunk.event try: nonlocal accum_delta - # llm_event.agent_id = check_call_stack_for_agent_id() - # llm_event.model = kwargs["model_id"] - # llm_event.prompt = kwargs["messages"] if chunk.event.event_type == "start": llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) @@ -81,8 +75,6 @@ def handle_stream_chunk(chunk: dict): def handle_stream_agent(chunk: dict): # NOTE: prompt/completion usage not returned in response when streaming - # We take the first ChatCompletionResponseStreamChunkEvent and accumulate the deltas from all subsequent chunks to build one full chat completion - # llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) # nonlocal llm_event nonlocal stack @@ -90,8 +82,6 @@ def handle_stream_agent(chunk: dict): if session is not None: llm_event.session_id = session.session_id - # if getattr(llm_event, 'returns', None): - # llm_event.returns = chunk.event try: if chunk.event.payload.event_type == "turn_start": logger.debug("turn_start") @@ -126,9 +116,7 @@ def handle_stream_agent(chunk: dict): "event": tool_event }) - # self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"): - # print('ToolExecution - in_progress') nonlocal accum_tool_delta delta = chunk.event.payload.tool_call_delta.content if accum_tool_delta: @@ -141,7 +129,6 @@ def handle_stream_agent(chunk: dict): tool_event = stack.pop().get("event") tool_event.end_timestamp = get_ISO_time() - # tool_event.name = "ToolExecution - success" tool_event.params["completion"] = accum_tool_delta self._safe_record(session, tool_event) elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): @@ -149,7 +136,6 @@ def handle_stream_agent(chunk: dict): if stack[-1]['event_type'] == "ToolExecution - started": tool_event = stack.pop().get("event") tool_event.end_timestamp = get_ISO_time() - # tool_event.name = "ToolExecution - failure" tool_event.params["completion"] = accum_tool_delta self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) @@ -183,9 +169,6 @@ def handle_stream_agent(chunk: dict): elif chunk.event.payload.event_type == "turn_complete": if stack[-1]['event_type'] == "turn_start": logger.debug('turn_start') - # llm_event = stack.pop() - # llm_event.end_timestamp = get_ISO_time() - # self._safe_record(session, llm_event) pass except Exception as e: From cdf33a7da5e1f7aeacfd74c25ab9930f6d5e1edc Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:34:28 +0530 Subject: [PATCH 61/69] linting --- agentops/llms/llama_stack_client.py | 119 ++++++++++++++++------------ 1 file changed, 68 insertions(+), 51 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 19429999a..6de8e0530 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -11,16 +11,18 @@ from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id from agentops.llms.instrumented_provider import InstrumentedProvider + class LlamaStackClientProvider(InstrumentedProvider): original_complete = None original_create_turn = None - def __init__(self, client): super().__init__(client) self._provider_name = "LlamaStack" - def handle_response(self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {}) -> dict: + def handle_response( + self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {} + ) -> dict: """Handle responses for LlamaStack""" try: stack = [] @@ -30,7 +32,6 @@ def handle_response(self, response, kwargs, init_timestamp, session: Optional[Se # llm_event = None def handle_stream_chunk(chunk: dict): - nonlocal stack # NOTE: prompt/completion usage not returned in response when streaming @@ -40,16 +41,15 @@ def handle_stream_chunk(chunk: dict): if chunk.event.event_type == "start": llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) - stack.append({ - 'event_type': "start", - 'event': llm_event - }) + stack.append({"event_type": "start", "event": llm_event}) accum_delta = chunk.event.delta elif chunk.event.event_type == "progress": accum_delta += chunk.event.delta elif chunk.event.event_type == "complete": - if stack[-1]['event_type'] == "start": # check if the last event in the stack is a step start event - llm_event = stack.pop().get("event") + if ( + stack[-1]["event_type"] == "start" + ): # check if the last event in the stack is a step start event + llm_event = stack.pop().get("event") llm_event.prompt = [ {"content": message.content, "role": message.role} for message in kwargs["messages"] ] @@ -75,7 +75,7 @@ def handle_stream_chunk(chunk: dict): def handle_stream_agent(chunk: dict): # NOTE: prompt/completion usage not returned in response when streaming - + # nonlocal llm_event nonlocal stack @@ -85,19 +85,16 @@ def handle_stream_agent(chunk: dict): try: if chunk.event.payload.event_type == "turn_start": logger.debug("turn_start") - stack.append({ - 'event_type': chunk.event.payload.event_type, - 'event': None - }) + stack.append({"event_type": chunk.event.payload.event_type, "event": None}) elif chunk.event.payload.event_type == "step_start": logger.debug("step_start") llm_event = LLMEvent(init_timestamp=get_ISO_time(), params=kwargs) - stack.append({ - 'event_type': chunk.event.payload.event_type, - 'event': llm_event - }) + stack.append({"event_type": chunk.event.payload.event_type, "event": llm_event}) elif chunk.event.payload.event_type == "step_progress": - if (chunk.event.payload.step_type == "inference" and chunk.event.payload.text_delta_model_response): + if ( + chunk.event.payload.step_type == "inference" + and chunk.event.payload.text_delta_model_response + ): nonlocal accum_delta delta = chunk.event.payload.text_delta_model_response @@ -105,50 +102,54 @@ def handle_stream_agent(chunk: dict): accum_delta += delta else: accum_delta = delta - elif (chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta): - if (chunk.event.payload.tool_call_delta.parse_status == "started"): - logger.debug('tool_started') + elif chunk.event.payload.step_type == "inference" and chunk.event.payload.tool_call_delta: + if chunk.event.payload.tool_call_delta.parse_status == "started": + logger.debug("tool_started") tool_event = ToolEvent(init_timestamp=get_ISO_time(), params=kwargs) tool_event.name = "tool_started" - stack.append({ - "event_type": "tool_started", - "event": tool_event - }) + stack.append({"event_type": "tool_started", "event": tool_event}) - elif (chunk.event.payload.tool_call_delta.parse_status == "in_progress"): + elif chunk.event.payload.tool_call_delta.parse_status == "in_progress": nonlocal accum_tool_delta delta = chunk.event.payload.tool_call_delta.content if accum_tool_delta: accum_tool_delta += delta else: accum_tool_delta = delta - elif (chunk.event.payload.tool_call_delta.parse_status == "success"): - logger.debug('ToolExecution - success') - if stack[-1]['event_type'] == "tool_started": # check if the last event in the stack is a tool execution event - + elif chunk.event.payload.tool_call_delta.parse_status == "success": + logger.debug("ToolExecution - success") + if ( + stack[-1]["event_type"] == "tool_started" + ): # check if the last event in the stack is a tool execution event tool_event = stack.pop().get("event") tool_event.end_timestamp = get_ISO_time() tool_event.params["completion"] = accum_tool_delta - self._safe_record(session, tool_event) - elif (chunk.event.payload.tool_call_delta.parse_status == "failure"): - logger.warning('ToolExecution - failure') - if stack[-1]['event_type'] == "ToolExecution - started": + self._safe_record(session, tool_event) + elif chunk.event.payload.tool_call_delta.parse_status == "failure": + logger.warning("ToolExecution - failure") + if stack[-1]["event_type"] == "ToolExecution - started": tool_event = stack.pop().get("event") tool_event.end_timestamp = get_ISO_time() tool_event.params["completion"] = accum_tool_delta - self._safe_record(session, ErrorEvent(trigger_event=tool_event, exception=Exception("ToolExecution - failure"))) + self._safe_record( + session, + ErrorEvent( + trigger_event=tool_event, exception=Exception("ToolExecution - failure") + ), + ) elif chunk.event.payload.event_type == "step_complete": logger.debug("Step complete event received") - - if (chunk.event.payload.step_type == "inference"): + + if chunk.event.payload.step_type == "inference": logger.debug("Step complete inference") - - if stack[-1]['event_type'] == "step_start": - llm_event = stack.pop().get("event") + + if stack[-1]["event_type"] == "step_start": + llm_event = stack.pop().get("event") llm_event.prompt = [ - {"content": message['content'], "role": message['role']} for message in kwargs["messages"] + {"content": message["content"], "role": message["role"]} + for message in kwargs["messages"] ] llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = metadata.get("model_id", "Unable to identify model") @@ -159,16 +160,16 @@ def handle_stream_agent(chunk: dict): self._safe_record(session, llm_event) else: logger.warning("Unexpected event stack state for inference step complete") - elif (chunk.event.payload.step_type == "tool_execution"): - if stack[-1]['event_type'] == "tool_started": - logger.debug('tool_complete') + elif chunk.event.payload.step_type == "tool_execution": + if stack[-1]["event_type"] == "tool_started": + logger.debug("tool_complete") tool_event = stack.pop().get("event") tool_event.name = "tool_complete" tool_event.params["completion"] = accum_tool_delta self._safe_record(session, tool_event) elif chunk.event.payload.event_type == "turn_complete": - if stack[-1]['event_type'] == "turn_start": - logger.debug('turn_start') + if stack[-1]["event_type"] == "turn_start": + logger.debug("turn_start") pass except Exception as e: @@ -183,23 +184,30 @@ def handle_stream_agent(chunk: dict): f"chunk:\n {chunk}\n" f"kwargs:\n {kwargs_str}\n" ) + if kwargs.get("stream", False): + def generator(): for chunk in response: handle_stream_chunk(chunk) yield chunk + return generator() elif inspect.isasyncgen(response): + async def async_generator(): async for chunk in response: handle_stream_agent(chunk) yield chunk + return async_generator() elif inspect.isgenerator(response): + async def async_generator(): async for chunk in response: handle_stream_agent(chunk) yield chunk + return async_generator() else: llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) @@ -209,7 +217,9 @@ async def async_generator(): llm_event.returns = response llm_event.agent_id = check_call_stack_for_agent_id() llm_event.model = kwargs["model_id"] - llm_event.prompt = [{"content": message.content, "role": message.role} for message in kwargs["messages"]] + llm_event.prompt = [ + {"content": message.content, "role": message.role} for message in kwargs["messages"] + ] llm_event.prompt_tokens = None llm_event.completion = response.completion_message.content llm_event.completion_tokens = None @@ -257,14 +267,19 @@ def patched_function(*args, **kwargs): session = kwargs.get("session", None) if "session" in kwargs.keys(): del kwargs["session"] - + result = self.original_create_turn(*args, **kwargs) - return self.handle_response(result, kwargs, init_timestamp, session=session, metadata={"model_id": args[0].agent_config.get("model")}) + return self.handle_response( + result, + kwargs, + init_timestamp, + session=session, + metadata={"model_id": args[0].agent_config.get("model")}, + ) # Override the original method with the patched one Agent.create_turn = patched_function - def override(self): self._override_complete() self._override_create_turn() @@ -272,8 +287,10 @@ def override(self): def undo_override(self): if self.original_complete is not None: from llama_stack_client.resources import InferenceResource + InferenceResource.chat_completion = self.original_complete if self.original_create_turn is not None: from llama_stack_client.lib.agents.agent import Agent + Agent.create_turn = self.original_create_turn From e66c878be63b463669156631203f02ff6f66f9d6 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sat, 7 Dec 2024 20:37:08 +0530 Subject: [PATCH 62/69] linting tests --- .../inference_canary_1.py | 2 + .../inference_canary_2.py | 4 +- tests/llama_stack/test_llama_stack.py | 43 ++++++++++--------- 3 files changed, 27 insertions(+), 22 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py index afbe48ff8..c88dfa48c 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_1.py @@ -19,6 +19,7 @@ base_url=f"{full_host}", ) + async def stream_test(): response = client.inference.chat_completion( messages=[ @@ -40,4 +41,5 @@ def main(): asyncio.run(stream_test()) agentops.end_session(end_state="Success") + main() diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py index 03a3f51ab..7c43ce510 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/inference_canary_2.py @@ -18,6 +18,7 @@ base_url=f"{full_host}", ) + async def stream_test(): response = client.inference.chat_completion( messages=[ @@ -49,7 +50,8 @@ def main(): ) print(response.completion_message.content) - + agentops.end_session(end_state="Success") + main() diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py index 47b80e767..4e5868de0 100644 --- a/tests/llama_stack/test_llama_stack.py +++ b/tests/llama_stack/test_llama_stack.py @@ -3,35 +3,36 @@ from llama_stack_client.lib.inference.event_logger import EventLogger from unittest.mock import MagicMock + class TestLlamaStack: def setup_method(self): self.client = LlamaStackClient() self.client.inference = MagicMock() - self.client.inference.chat_completion = MagicMock(return_value=[ - { - "choices": [ - { - "message": { - "content": "Moonlight whispers softly", - "role": "assistant", + self.client.inference.chat_completion = MagicMock( + return_value=[ + { + "choices": [ + { + "message": { + "content": "Moonlight whispers softly", + "role": "assistant", + } } - } - ] - } - ]) - + ] + } + ] + ) def test_llama_stack_inference(self): self.client.inference.chat_completion.assert_not_called() self.client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-1B-Instruct", - stream=False, + messages=[ + UserMessage( + content="hello world, write me a 3 word poem about the moon", + role="user", + ), + ], + model_id="meta-llama/Llama-3.2-1B-Instruct", + stream=False, ) self.client.inference.chat_completion.assert_called_once() - \ No newline at end of file From efeffd011b260dde2d63511143f581397ed1f404 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 7 Dec 2024 20:50:59 -0500 Subject: [PATCH 63/69] fix generator bug --- agentops/llms/llama_stack_client.py | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index 6de8e0530..be7815c3f 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -24,6 +24,7 @@ def handle_response( self, response, kwargs, init_timestamp, session: Optional[Session] = None, metadata: Optional[Dict] = {} ) -> dict: """Handle responses for LlamaStack""" + try: stack = [] accum_delta = None @@ -194,21 +195,19 @@ def generator(): return generator() elif inspect.isasyncgen(response): - - async def async_generator(): + async def agent_generator(): async for chunk in response: handle_stream_agent(chunk) yield chunk - return async_generator() + return agent_generator() elif inspect.isgenerator(response): - - async def async_generator(): - async for chunk in response: + def agent_generator(): + for chunk in response: handle_stream_agent(chunk) yield chunk - return async_generator() + return agent_generator() else: llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs) if session is not None: From 27db3df82286cb26a7864014b0ebff01de6ed925 Mon Sep 17 00:00:00 2001 From: tad dy Date: Sat, 7 Dec 2024 23:49:56 -0500 Subject: [PATCH 64/69] saving working agent_canary test - works in Python notebook AND in script --- .../llama_stack_example.ipynb | 414 +++++++++++++++++- .../llama_stack_example_for_ci.ipynb | 398 +++++++++++++++++ .../llama_stack_client_canary/agent_canary.py | 24 +- 3 files changed, 802 insertions(+), 34 deletions(-) create mode 100644 examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb index 6dda032f3..621b692c0 100644 --- a/examples/llama_stack_client_examples/llama_stack_example.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb @@ -17,9 +17,206 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 1, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.57)\n", + "Collecting llama-stack-client\n", + " Downloading llama_stack_client-0.0.58-py3-none-any.whl.metadata (15 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n", + "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n", + "Downloading llama_stack_client-0.0.58-py3-none-any.whl (286 kB)\n", + "Installing collected packages: llama-stack-client\n", + " Attempting uninstall: llama-stack-client\n", + " Found existing installation: llama_stack_client 0.0.57\n", + " Uninstalling llama_stack_client-0.0.57:\n", + " Successfully uninstalled llama_stack_client-0.0.57\n", + "Successfully installed llama-stack-client-0.0.58\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Collecting llama-stack\n", + " Downloading llama_stack-0.0.58-py3-none-any.whl.metadata (12 kB)\n", + "Collecting blobfile (from llama-stack)\n", + " Using cached blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n", + "Collecting fire (from llama-stack)\n", + " Using cached fire-0.7.0.tar.gz (87 kB)\n", + " Installing build dependencies ... \u001b[?25ldone\n", + "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", + "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25hRequirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n", + "Collecting huggingface-hub (from llama-stack)\n", + " Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n", + "Collecting llama-models>=0.0.58 (from llama-stack)\n", + " Downloading llama_models-0.0.58-py3-none-any.whl.metadata (8.2 kB)\n", + "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n", + "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n", + "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n", + "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n", + "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n", + "Collecting jinja2 (from llama-models>=0.0.58->llama-stack)\n", + " Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n", + "Collecting tiktoken (from llama-models>=0.0.58->llama-stack)\n", + " Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\n", + "Collecting Pillow (from llama-models>=0.0.58->llama-stack)\n", + " Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.1 kB)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n", + "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n", + "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", + "Collecting pycryptodomex>=3.8 (from blobfile->llama-stack)\n", + " Using cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl.metadata (3.4 kB)\n", + "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n", + "Collecting lxml>=4.9 (from blobfile->llama-stack)\n", + " Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.8 kB)\n", + "Collecting filelock>=3.0 (from blobfile->llama-stack)\n", + " Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n", + "Collecting fsspec>=2023.5.0 (from huggingface-hub->llama-stack)\n", + " Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n", + "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", + "Collecting MarkupSafe>=2.0 (from jinja2->llama-models>=0.0.58->llama-stack)\n", + " Using cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", + "Collecting regex>=2022.1.18 (from tiktoken->llama-models>=0.0.58->llama-stack)\n", + " Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n", + "Downloading llama_stack-0.0.58-py3-none-any.whl (446 kB)\n", + "Downloading llama_models-0.0.58-py3-none-any.whl (1.6 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached blobfile-3.0.0-py3-none-any.whl (75 kB)\n", + "Downloading huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n", + "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n", + "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n", + "Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl (8.1 MB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n", + "\u001b[?25hUsing cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl (2.5 MB)\n", + "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n", + "Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl (3.0 MB)\n", + "Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl (982 kB)\n", + "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m982.4/982.4 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", + "\u001b[?25hUsing cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl (12 kB)\n", + "Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl (284 kB)\n", + "Building wheels for collected packages: fire\n", + " Building wheel for fire (pyproject.toml) ... \u001b[?25ldone\n", + "\u001b[?25h Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=28249a5b845d2594cddd5e302164aa8818158be391c1a1b5f0ae4d10c50bd63c\n", + " Stored in directory: /Users/a/Library/Caches/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e\n", + "Successfully built fire\n", + "Installing collected packages: regex, pycryptodomex, Pillow, MarkupSafe, lxml, fsspec, fire, filelock, tiktoken, jinja2, huggingface-hub, blobfile, llama-models, llama-stack\n", + "Successfully installed MarkupSafe-3.0.2 Pillow-11.0.0 blobfile-3.0.0 filelock-3.16.1 fire-0.7.0 fsspec-2024.10.0 huggingface-hub-0.26.5 jinja2-3.1.4 llama-models-0.0.58 llama-stack-0.0.58 lxml-5.3.0 pycryptodomex-3.21.0 regex-2024.11.6 tiktoken-0.8.0\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n", + "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n", + "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n", + "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n", + "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n", + "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n", + "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", + "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Collecting fastapi\n", + " Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n", + "Collecting starlette<0.42.0,>=0.40.0 (from fastapi)\n", + " Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", + "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n", + "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n", + "Installing collected packages: starlette, fastapi\n", + "Successfully installed fastapi-0.115.6 starlette-0.41.3\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", + "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U llama-stack\n", @@ -30,18 +227,29 @@ "%pip install opentelemetry-sdk\n" ] }, - { - "cell_type": "markdown", - "metadata": {}, - "source": [ - "Then import them" - ] - }, { "cell_type": "code", "execution_count": 3, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n" + ] + } + ], "source": [ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client import LlamaStackClient\n", @@ -77,9 +285,36 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 4, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n" + ] + } + ], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", @@ -108,9 +343,40 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 5, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n" + ] + }, + { + "ename": "InternalServerError", + "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}", + "output_type": "error", + "traceback": [ + "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", + "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/agentops/llms/llama_stack_client.py:252\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 252\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", + "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", + "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" + ] + } + ], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", @@ -137,9 +403,95 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 9, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=48206eed-d5d8-4979-ab6e-3577faff5ad4\u001b[0m\u001b[0m\n", + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f0f95a35-876f-478d-9542-fe3261ad3d18\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "No available shields. Disable safety.\n", + "Using model: meta-llama/Llama-3.2-1B-Instruct\n", + "response=.agent_generator at 0x10f44b370>\n", + "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0mresponse=.agent_generator at 0x10f44a3b0>\n", + "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m Sem\u001b[0m\u001b[36mif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m teams\u001b[0m\u001b[36m\")\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[97m\u001b[0m\n", + "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference Semifinals 2014 teams'}\u001b[0m\n", + "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference Semifinals 2014 teams\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"This would be the last Game 7 won by a road team until the 2016 NBA Finals. Game 4 of the Heat\\u2013Nets series saw LeBron James record a Heat franchise playoff high 49 points. He eventually led the Miami Heat to their fourth consecutive Eastern Conference Finals appearance with a win in Game 5. With a Game 5 win over the Portland Trail Blazers, the San Antonio Spurs advanced to the Western ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Playoffs Summary | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/NBA_2014.html\", \"description\": \"Checkout the Results, Statistics, Playoff Leaders, Per Game Stats, Advanced Stats and more for the 2014 NBA playoffs on Basketball-Reference.com\", \"type\": \"search_result\"}]}\u001b[0m\n", + "\u001b[35mshield_call> No Violation\u001b[0m\n", + "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winning\u001b[0m\u001b[33m team\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m of\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m was\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m game\u001b[0m\u001b[33m was\u001b[0m\u001b[33m played\u001b[0m\u001b[33m between\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m ultimately\u001b[0m\u001b[33m advancing\u001b[0m\u001b[33m to\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m Finals\u001b[0m\u001b[33m.\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", + "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: API Key is invalid: {}.\n", + "\t Find your API key at https://app.agentops.ai/settings/projects\n", + "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n" + ] + } + ], "source": [ "import os\n", "from llama_stack_client import LlamaStackClient\n", @@ -213,8 +565,8 @@ "\n", " print(f\"{response=}\")\n", "\n", - " # for log in EventLogger().log(response):\n", - " # log.print()\n", + " for log in EventLogger().log(response):\n", + " log.print()\n", "\n", "agentops.start_session()\n", "\n", @@ -225,12 +577,32 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 8, "metadata": {}, - "outputs": [], + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 28.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n", + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 19.6s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=516a6f7f-56b5-4f04-bad6-a42d76fc7f55\u001b[0m\u001b[0m\n", + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 9.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=e6a248fb-b78c-4fd4-bffe-50a0a8065bfa\u001b[0m\u001b[0m\n" + ] + } + ], "source": [ "agentops.end_all_sessions()" ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] } ], "metadata": { @@ -249,7 +621,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.12.2" + "version": "3.10.0" } }, "nbformat": 4, diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb new file mode 100644 index 000000000..64a40fe12 --- /dev/null +++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb @@ -0,0 +1,398 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Llama Stack Client Examples\n", + "Use the llama_stack_client library to interact with a Llama Stack server" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "First let's install the required packages" + ] + }, + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n", + "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n", + "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n", + "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: llama-stack in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n", + "Requirement already satisfied: blobfile in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.0)\n", + "Requirement already satisfied: fire in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.7.0)\n", + "Requirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n", + "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.26.5)\n", + "Requirement already satisfied: llama-models>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", + "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", + "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n", + "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n", + "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n", + "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n", + "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n", + "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n", + "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n", + "Requirement already satisfied: jinja2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (3.1.4)\n", + "Requirement already satisfied: tiktoken in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (0.8.0)\n", + "Requirement already satisfied: Pillow in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (11.0.0)\n", + "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n", + "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n", + "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n", + "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n", + "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n", + "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n", + "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n", + "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n", + "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n", + "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n", + "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n", + "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", + "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.21.0)\n", + "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n", + "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (5.3.0)\n", + "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.16.1)\n", + "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n", + "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n", + "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n", + "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n", + "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n", + "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", + "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from jinja2->llama-models>=0.0.58->llama-stack) (3.0.2)\n", + "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n", + "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n", + "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", + "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", + "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from tiktoken->llama-models>=0.0.58->llama-stack) (2024.11.6)\n", + "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n", + "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n", + "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n", + "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n", + "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n", + "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n", + "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n", + "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n", + "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", + "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n", + "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", + "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", + "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", + "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: fastapi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.115.6)\n", + "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (0.41.3)\n", + "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n", + "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n", + "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", + "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", + "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n", + "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n", + "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", + "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n", + "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", + "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n", + "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n", + "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n", + "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", + "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", + "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", + "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", + "Note: you may need to restart the kernel to use updated packages.\n" + ] + } + ], + "source": [ + "%pip install -U llama-stack-client\n", + "%pip install -U llama-stack\n", + "%pip install -U agentops\n", + "%pip install -U python-dotenv\n", + "%pip install -U fastapi\n", + "%pip install opentelemetry-api\n", + "%pip install opentelemetry-sdk\n" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n" + ] + } + ], + "source": [ + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.inference.event_logger import EventLogger\n", + "from llama_stack_client.types import UserMessage\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from dotenv import load_dotenv\n", + "import os\n", + "import agentops\n", + "\n", + "load_dotenv()\n", + "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"\"\n", + "\n", + "agentops.init(AGENTOPS_API_KEY, default_tags=[\"llama-stack-client-example\"], auto_start_session=False)\n", + "\n", + "host = \"0.0.0.0\" # LLAMA_STACK_HOST\n", + "port = 5001 # LLAMA_STACK_PORT\n", + "\n", + "full_host = f\"http://{host}:{port}\"\n", + "\n", + "client = LlamaStackClient(\n", + " base_url=f\"{full_host}\",\n", + ")" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Inference Canary + Agent Canary" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n" + ] + }, + { + "name": "stdout", + "output_type": "stream", + "text": [ + "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n", + "No available shields. Disable safety.\n", + "Using model: meta-llama/Llama-3.2-1B-Instruct\n", + "response=.agent_generator at 0x1240c3990>\n", + "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0mresponse=.agent_generator at 0x1240c3840>\n", + "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m semif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m winners\u001b[0m\u001b[36m\")\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference semifinals 2014 winners'}\u001b[0m\n", + "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference semifinals 2014 winners\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"The 2014 NBA playoffs was the postseason tournament of the National Basketball Association's 2013\\u201314 season. The tournament concluded with the Western Conference champion San Antonio Spurs defeating the two-time defending NBA champion and Eastern Conference champion Miami Heat 4 games to ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Finals | Basketball Wiki | Fandom\", \"url\": \"https://basketball.fandom.com/wiki/2014_NBA_Finals\", \"description\": \"The 2014 NBA Finals was the championship series of the 2013\\u201314 season of the National Basketball Association (NBA) and the conclusion of the season's playoffs. The Western Conference champion San Antonio Spurs defeated the two-time defending NBA champion and Eastern Conference champion Miami ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs\", \"type\": \"search_result\"}]}\u001b[0m\n", + "\u001b[35mshield_call> No Violation\u001b[0m\n", + "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winners\u001b[0m\u001b[33m of\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m in\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m were\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m who\u001b[0m\u001b[33m defeated\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m \u001b[0m\u001b[33m4\u001b[0m\u001b[33m games\u001b[0m\u001b[33m to\u001b[0m\u001b[33m \u001b[0m\u001b[33m0\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n", + "\u001b[30m\u001b[0m" + ] + }, + { + "name": "stderr", + "output_type": "stream", + "text": [ + "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 4 | \u001b[1mTools:\u001b[0m 1 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", + "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n" + ] + } + ], + "source": [ + "### Inference Canary\n", + "\n", + "agentops.start_session() # AgentOps start session\n", + "\n", + "response = client.inference.chat_completion(\n", + " messages=[\n", + " UserMessage(\n", + " content=\"hello world, write me a 3 word poem about the moon\",\n", + " role=\"user\",\n", + " ),\n", + " ],\n", + " model_id=\"meta-llama/Llama-3.2-1B-Instruct\",\n", + " stream=True\n", + ")\n", + "\n", + "async for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "\n", + "### Agent Canary\n", + "\n", + "import os\n", + "from llama_stack_client import LlamaStackClient\n", + "from llama_stack_client.lib.agents.agent import Agent\n", + "from llama_stack_client.lib.agents.event_logger import EventLogger\n", + "from llama_stack_client.types.agent_create_params import AgentConfig\n", + "\n", + "LLAMA_STACK_PORT = 5001\n", + "\n", + "# Replace with actual API keys for functionality\n", + "BRAVE_SEARCH_API_KEY = os.getenv(\"BRAVE_SEARCH_API_KEY\") or \"your-brave-search-api-key\"\n", + "\n", + "async def agent_test():\n", + " client = LlamaStackClient(\n", + " base_url=f\"http://0.0.0.0:{LLAMA_STACK_PORT}\",\n", + " )\n", + "\n", + " available_shields = [shield.identifier for shield in client.shields.list()]\n", + " if not available_shields:\n", + " print(\"No available shields. Disable safety.\")\n", + " else:\n", + " print(f\"Available shields found: {available_shields}\")\n", + " available_models = [model.identifier for model in client.models.list()]\n", + " if not available_models:\n", + " raise ValueError(\"No available models\")\n", + " else:\n", + " selected_model = available_models[0]\n", + " print(f\"Using model: {selected_model}\")\n", + "\n", + " agent_config = AgentConfig(\n", + " model=selected_model,\n", + " instructions=\"You are a helpful assistant. Just say hello as a greeting.\",\n", + " sampling_params={\n", + " \"strategy\": \"greedy\",\n", + " \"temperature\": 1.0,\n", + " \"top_p\": 0.9,\n", + " },\n", + " tools=[\n", + " {\n", + " \"type\": \"brave_search\",\n", + " \"engine\": \"brave\",\n", + " \"api_key\": BRAVE_SEARCH_API_KEY,\n", + " }\n", + " ],\n", + " tool_choice=\"auto\",\n", + " tool_prompt_format=\"json\",\n", + " input_shields=available_shields if available_shields else [],\n", + " output_shields=available_shields if available_shields else [],\n", + " enable_session_persistence=False,\n", + " )\n", + " agent = Agent(client, agent_config)\n", + " user_prompts = [\n", + " \"Hello\",\n", + " \"Which players played in the winning team of the NBA western conference semifinals of 2014, please use tools\",\n", + " ]\n", + "\n", + " session_id = agent.create_session(\"test-session\")\n", + "\n", + " for prompt in user_prompts:\n", + " response = agent.create_turn(\n", + " messages=[\n", + " {\n", + " \"role\": \"user\",\n", + " \"content\": prompt,\n", + " }\n", + " ],\n", + " session_id=session_id,\n", + " )\n", + "\n", + " print(f\"{response=}\")\n", + "\n", + " for log in EventLogger().log(response):\n", + " log.print()\n", + "\n", + "await agent_test()\n", + "\n", + "agentops.end_session(\"Success\") # AgentOps end session" + ] + }, + { + "cell_type": "code", + "execution_count": 4, + "metadata": {}, + "outputs": [], + "source": [ + "agentops.end_all_sessions()" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + } + ], + "metadata": { + "kernelspec": { + "display_name": "venv", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.10.0" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index ab6399cbc..8095a3c82 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -1,27 +1,28 @@ import asyncio import os +from dotenv import load_dotenv from llama_stack_client import LlamaStackClient from llama_stack_client.lib.agents.agent import Agent from llama_stack_client.lib.agents.event_logger import EventLogger -from llama_stack_client.types import Attachment from llama_stack_client.types.agent_create_params import AgentConfig -import agentops - -LLAMA_STACK_PORT = 5001 -INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" +load_dotenv() # import debugpy # debugpy.listen(5678) # debugpy.wait_for_client() -agentops.init(default_tags=["llama-stack-client-example"], auto_start_session=False) +import agentops # type: ignore +agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) +LLAMA_STACK_HOST = "0.0.0.0" +LLAMA_STACK_PORT = 5001 +INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" async def agent_test(): client = LlamaStackClient( - base_url=f"http://localhost:{LLAMA_STACK_PORT}", + base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}", ) available_shields = [shield.identifier for shield in client.shields.list()] @@ -76,12 +77,9 @@ async def agent_test(): session_id=session_id, ) - print("Response: ", response) - - # for log in EventLogger().log(response): - # log.print() - + for log in EventLogger().log(response): + log.print() agentops.start_session() asyncio.run(agent_test()) -agentops.end_session("Success") +agentops.end_session(end_state="Success") \ No newline at end of file From 35ce1db343e983e609c6a97c0ba70e372a56e8c0 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Sun, 8 Dec 2024 17:38:25 +0530 Subject: [PATCH 65/69] clean notebook and remove commented code --- .../llama_stack_example.ipynb | 403 +----------------- .../llama_stack_example_for_ci.ipynb | 203 +-------- .../llama_stack_client_canary/agent_canary.py | 4 - 3 files changed, 18 insertions(+), 592 deletions(-) diff --git a/examples/llama_stack_client_examples/llama_stack_example.ipynb b/examples/llama_stack_client_examples/llama_stack_example.ipynb index 621b692c0..42297557c 100644 --- a/examples/llama_stack_client_examples/llama_stack_example.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_example.ipynb @@ -17,206 +17,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.57)\n", - "Collecting llama-stack-client\n", - " Downloading llama_stack_client-0.0.58-py3-none-any.whl.metadata (15 kB)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n", - "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n", - "Downloading llama_stack_client-0.0.58-py3-none-any.whl (286 kB)\n", - "Installing collected packages: llama-stack-client\n", - " Attempting uninstall: llama-stack-client\n", - " Found existing installation: llama_stack_client 0.0.57\n", - " Uninstalling llama_stack_client-0.0.57:\n", - " Successfully uninstalled llama_stack_client-0.0.57\n", - "Successfully installed llama-stack-client-0.0.58\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Collecting llama-stack\n", - " Downloading llama_stack-0.0.58-py3-none-any.whl.metadata (12 kB)\n", - "Collecting blobfile (from llama-stack)\n", - " Using cached blobfile-3.0.0-py3-none-any.whl.metadata (15 kB)\n", - "Collecting fire (from llama-stack)\n", - " Using cached fire-0.7.0.tar.gz (87 kB)\n", - " Installing build dependencies ... \u001b[?25ldone\n", - "\u001b[?25h Getting requirements to build wheel ... \u001b[?25ldone\n", - "\u001b[?25h Preparing metadata (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25hRequirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n", - "Collecting huggingface-hub (from llama-stack)\n", - " Downloading huggingface_hub-0.26.5-py3-none-any.whl.metadata (13 kB)\n", - "Collecting llama-models>=0.0.58 (from llama-stack)\n", - " Downloading llama_models-0.0.58-py3-none-any.whl.metadata (8.2 kB)\n", - "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n", - "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n", - "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n", - "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n", - "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n", - "Collecting jinja2 (from llama-models>=0.0.58->llama-stack)\n", - " Using cached jinja2-3.1.4-py3-none-any.whl.metadata (2.6 kB)\n", - "Collecting tiktoken (from llama-models>=0.0.58->llama-stack)\n", - " Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (6.6 kB)\n", - "Collecting Pillow (from llama-models>=0.0.58->llama-stack)\n", - " Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl.metadata (9.1 kB)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n", - "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n", - "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", - "Collecting pycryptodomex>=3.8 (from blobfile->llama-stack)\n", - " Using cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl.metadata (3.4 kB)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n", - "Collecting lxml>=4.9 (from blobfile->llama-stack)\n", - " Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl.metadata (3.8 kB)\n", - "Collecting filelock>=3.0 (from blobfile->llama-stack)\n", - " Using cached filelock-3.16.1-py3-none-any.whl.metadata (2.9 kB)\n", - "Collecting fsspec>=2023.5.0 (from huggingface-hub->llama-stack)\n", - " Using cached fsspec-2024.10.0-py3-none-any.whl.metadata (11 kB)\n", - "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Collecting MarkupSafe>=2.0 (from jinja2->llama-models>=0.0.58->llama-stack)\n", - " Using cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl.metadata (4.0 kB)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", - "Collecting regex>=2022.1.18 (from tiktoken->llama-models>=0.0.58->llama-stack)\n", - " Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl.metadata (40 kB)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n", - "Downloading llama_stack-0.0.58-py3-none-any.whl (446 kB)\n", - "Downloading llama_models-0.0.58-py3-none-any.whl (1.6 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m1.6/1.6 MB\u001b[0m \u001b[31m6.7 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached blobfile-3.0.0-py3-none-any.whl (75 kB)\n", - "Downloading huggingface_hub-0.26.5-py3-none-any.whl (447 kB)\n", - "Using cached filelock-3.16.1-py3-none-any.whl (16 kB)\n", - "Using cached fsspec-2024.10.0-py3-none-any.whl (179 kB)\n", - "Downloading lxml-5.3.0-cp310-cp310-macosx_10_9_universal2.whl (8.1 MB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m8.1/8.1 MB\u001b[0m \u001b[31m8.0 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0ma \u001b[36m0:00:01\u001b[0mm\n", - "\u001b[?25hUsing cached pycryptodomex-3.21.0-cp36-abi3-macosx_10_9_universal2.whl (2.5 MB)\n", - "Using cached jinja2-3.1.4-py3-none-any.whl (133 kB)\n", - "Using cached pillow-11.0.0-cp310-cp310-macosx_11_0_arm64.whl (3.0 MB)\n", - "Downloading tiktoken-0.8.0-cp310-cp310-macosx_11_0_arm64.whl (982 kB)\n", - "\u001b[2K \u001b[90m━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━━\u001b[0m \u001b[32m982.4/982.4 kB\u001b[0m \u001b[31m15.2 MB/s\u001b[0m eta \u001b[36m0:00:00\u001b[0m\n", - "\u001b[?25hUsing cached MarkupSafe-3.0.2-cp310-cp310-macosx_11_0_arm64.whl (12 kB)\n", - "Using cached regex-2024.11.6-cp310-cp310-macosx_11_0_arm64.whl (284 kB)\n", - "Building wheels for collected packages: fire\n", - " Building wheel for fire (pyproject.toml) ... \u001b[?25ldone\n", - "\u001b[?25h Created wheel for fire: filename=fire-0.7.0-py3-none-any.whl size=114249 sha256=28249a5b845d2594cddd5e302164aa8818158be391c1a1b5f0ae4d10c50bd63c\n", - " Stored in directory: /Users/a/Library/Caches/pip/wheels/19/39/2f/2d3cadc408a8804103f1c34ddd4b9f6a93497b11fa96fe738e\n", - "Successfully built fire\n", - "Installing collected packages: regex, pycryptodomex, Pillow, MarkupSafe, lxml, fsspec, fire, filelock, tiktoken, jinja2, huggingface-hub, blobfile, llama-models, llama-stack\n", - "Successfully installed MarkupSafe-3.0.2 Pillow-11.0.0 blobfile-3.0.0 filelock-3.16.1 fire-0.7.0 fsspec-2024.10.0 huggingface-hub-0.26.5 jinja2-3.1.4 llama-models-0.0.58 llama-stack-0.0.58 lxml-5.3.0 pycryptodomex-3.21.0 regex-2024.11.6 tiktoken-0.8.0\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n", - "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n", - "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n", - "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n", - "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n", - "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n", - "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n", - "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", - "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Collecting fastapi\n", - " Using cached fastapi-0.115.6-py3-none-any.whl.metadata (27 kB)\n", - "Collecting starlette<0.42.0,>=0.40.0 (from fastapi)\n", - " Using cached starlette-0.41.3-py3-none-any.whl.metadata (6.0 kB)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", - "Using cached fastapi-0.115.6-py3-none-any.whl (94 kB)\n", - "Using cached starlette-0.41.3-py3-none-any.whl (73 kB)\n", - "Installing collected packages: starlette, fastapi\n", - "Successfully installed fastapi-0.115.6 starlette-0.41.3\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", - "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U llama-stack\n", @@ -229,27 +32,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n" - ] - } - ], + "outputs": [], "source": [ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client import LlamaStackClient\n", @@ -285,36 +70,9 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 1 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=ceea2686-c0ed-4190-b106-eeae88ffe5ca\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", @@ -343,40 +101,9 @@ }, { "cell_type": "code", - "execution_count": 5, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n" - ] - }, - { - "ename": "InternalServerError", - "evalue": "Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}", - "output_type": "error", - "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mInternalServerError\u001b[0m Traceback (most recent call last)", - "Cell \u001b[0;32mIn[5], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m agentops\u001b[38;5;241m.\u001b[39mstart_session()\n\u001b[0;32m----> 2\u001b[0m response \u001b[38;5;241m=\u001b[39m \u001b[43mclient\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43minference\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mchat_completion\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 3\u001b[0m \u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m[\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43mUserMessage\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mcontent\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwrite me a 3 word poem about the moon\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mrole\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43muser\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmeta-llama/Llama-3.2-1B-Instruct\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\n\u001b[1;32m 11\u001b[0m \u001b[43m)\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[38;5;28mprint\u001b[39m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m> Response: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mresponse\u001b[38;5;241m.\u001b[39mcompletion_message\u001b[38;5;241m.\u001b[39mcontent\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 14\u001b[0m agentops\u001b[38;5;241m.\u001b[39mend_session(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mSuccess\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/agentops/llms/llama_stack_client.py:252\u001b[0m, in \u001b[0;36mLlamaStackClientProvider._override_complete..patched_function\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 250\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01min\u001b[39;00m kwargs\u001b[38;5;241m.\u001b[39mkeys():\n\u001b[1;32m 251\u001b[0m \u001b[38;5;28;01mdel\u001b[39;00m kwargs[\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124msession\u001b[39m\u001b[38;5;124m\"\u001b[39m]\n\u001b[0;32m--> 252\u001b[0m result \u001b[38;5;241m=\u001b[39m \u001b[43moriginal_complete\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 253\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mhandle_response(result, kwargs, init_timestamp, session\u001b[38;5;241m=\u001b[39msession)\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_utils/_utils.py:275\u001b[0m, in \u001b[0;36mrequired_args..inner..wrapper\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m 273\u001b[0m msg \u001b[38;5;241m=\u001b[39m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mMissing required argument: \u001b[39m\u001b[38;5;132;01m{\u001b[39;00mquote(missing[\u001b[38;5;241m0\u001b[39m])\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 274\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mTypeError\u001b[39;00m(msg)\n\u001b[0;32m--> 275\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[43mfunc\u001b[49m\u001b[43m(\u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43margs\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[38;5;241;43m*\u001b[39;49m\u001b[43mkwargs\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/resources/inference.py:217\u001b[0m, in \u001b[0;36mInferenceResource.chat_completion\u001b[0;34m(self, messages, model_id, logprobs, response_format, sampling_params, stream, tool_choice, tool_prompt_format, tools, x_llama_stack_provider_data, extra_headers, extra_query, extra_body, timeout)\u001b[0m\n\u001b[1;32m 210\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mAccept\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtext/event-stream\u001b[39m\u001b[38;5;124m\"\u001b[39m, \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {})}\n\u001b[1;32m 211\u001b[0m extra_headers \u001b[38;5;241m=\u001b[39m {\n\u001b[1;32m 212\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39mstrip_not_given({\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mX-LlamaStack-ProviderData\u001b[39m\u001b[38;5;124m\"\u001b[39m: x_llama_stack_provider_data}),\n\u001b[1;32m 213\u001b[0m \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39m(extra_headers \u001b[38;5;129;01mor\u001b[39;00m {}),\n\u001b[1;32m 214\u001b[0m }\n\u001b[1;32m 215\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(\n\u001b[1;32m 216\u001b[0m InferenceChatCompletionResponse,\n\u001b[0;32m--> 217\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_post\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 218\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43m/alpha/inference/chat-completion\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 219\u001b[0m \u001b[43m \u001b[49m\u001b[43mbody\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmaybe_transform\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 220\u001b[0m \u001b[43m \u001b[49m\u001b[43m{\u001b[49m\n\u001b[1;32m 221\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmessages\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmessages\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 222\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mmodel_id\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mmodel_id\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 223\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mlogprobs\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mlogprobs\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 224\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mresponse_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mresponse_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 225\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43msampling_params\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43msampling_params\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 226\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mstream\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 227\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_choice\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_choice\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 228\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtool_prompt_format\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtool_prompt_format\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 229\u001b[0m \u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mtools\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m:\u001b[49m\u001b[43m \u001b[49m\u001b[43mtools\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 230\u001b[0m \u001b[43m \u001b[49m\u001b[43m}\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 231\u001b[0m \u001b[43m \u001b[49m\u001b[43minference_chat_completion_params\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mInferenceChatCompletionParams\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 232\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 233\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mmake_request_options\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 234\u001b[0m \u001b[43m \u001b[49m\u001b[43mextra_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_headers\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_query\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_query\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mextra_body\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mextra_body\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mtimeout\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mtimeout\u001b[49m\n\u001b[1;32m 235\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 236\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 237\u001b[0m \u001b[43m \u001b[49m\u001b[43mAny\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\n\u001b[1;32m 238\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;66;43;03m# Union types cannot be passed in as arguments in the type system\u001b[39;49;00m\n\u001b[1;32m 239\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;129;43;01mor\u001b[39;49;00m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mFalse\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 240\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mStream\u001b[49m\u001b[43m[\u001b[49m\u001b[43mInferenceChatCompletionResponse\u001b[49m\u001b[43m]\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 241\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m,\n\u001b[1;32m 242\u001b[0m )\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1263\u001b[0m, in \u001b[0;36mSyncAPIClient.post\u001b[0;34m(self, path, cast_to, body, options, files, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1249\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mpost\u001b[39m(\n\u001b[1;32m 1250\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 1251\u001b[0m path: \u001b[38;5;28mstr\u001b[39m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1258\u001b[0m stream_cls: \u001b[38;5;28mtype\u001b[39m[_StreamT] \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[1;32m 1259\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m ResponseT \u001b[38;5;241m|\u001b[39m _StreamT:\n\u001b[1;32m 1260\u001b[0m opts \u001b[38;5;241m=\u001b[39m FinalRequestOptions\u001b[38;5;241m.\u001b[39mconstruct(\n\u001b[1;32m 1261\u001b[0m method\u001b[38;5;241m=\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpost\u001b[39m\u001b[38;5;124m\"\u001b[39m, url\u001b[38;5;241m=\u001b[39mpath, json_data\u001b[38;5;241m=\u001b[39mbody, files\u001b[38;5;241m=\u001b[39mto_httpx_files(files), \u001b[38;5;241m*\u001b[39m\u001b[38;5;241m*\u001b[39moptions\n\u001b[1;32m 1262\u001b[0m )\n\u001b[0;32m-> 1263\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cast(ResponseT, \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mrequest\u001b[49m\u001b[43m(\u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mopts\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m)\u001b[49m)\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:955\u001b[0m, in \u001b[0;36mSyncAPIClient.request\u001b[0;34m(self, cast_to, options, remaining_retries, stream, stream_cls)\u001b[0m\n\u001b[1;32m 952\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[1;32m 953\u001b[0m retries_taken \u001b[38;5;241m=\u001b[39m \u001b[38;5;241m0\u001b[39m\n\u001b[0;32m--> 955\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 956\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 957\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 958\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 959\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 960\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 961\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1043\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1041\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m remaining_retries \u001b[38;5;241m>\u001b[39m \u001b[38;5;241m0\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_should_retry(err\u001b[38;5;241m.\u001b[39mresponse):\n\u001b[1;32m 1042\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mclose()\n\u001b[0;32m-> 1043\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_retry_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1044\u001b[0m \u001b[43m \u001b[49m\u001b[43minput_options\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1045\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1046\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1047\u001b[0m \u001b[43m \u001b[49m\u001b[43mresponse_headers\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43merr\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mresponse\u001b[49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mheaders\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1048\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1049\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1050\u001b[0m \u001b[43m \u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 1052\u001b[0m \u001b[38;5;66;03m# If the response is streamed then we need to explicitly read the response\u001b[39;00m\n\u001b[1;32m 1053\u001b[0m \u001b[38;5;66;03m# to completion before attempting to access the response text.\u001b[39;00m\n\u001b[1;32m 1054\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mis_closed:\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1092\u001b[0m, in \u001b[0;36mSyncAPIClient._retry_request\u001b[0;34m(self, options, cast_to, retries_taken, response_headers, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1088\u001b[0m \u001b[38;5;66;03m# In a synchronous context we are blocking the entire thread. Up to the library user to run the client in a\u001b[39;00m\n\u001b[1;32m 1089\u001b[0m \u001b[38;5;66;03m# different thread if necessary.\u001b[39;00m\n\u001b[1;32m 1090\u001b[0m time\u001b[38;5;241m.\u001b[39msleep(timeout)\n\u001b[0;32m-> 1092\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_request\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 1093\u001b[0m \u001b[43m \u001b[49m\u001b[43moptions\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43moptions\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1094\u001b[0m \u001b[43m \u001b[49m\u001b[43mcast_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mcast_to\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1095\u001b[0m \u001b[43m \u001b[49m\u001b[43mretries_taken\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mretries_taken\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m+\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1096\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1097\u001b[0m \u001b[43m \u001b[49m\u001b[43mstream_cls\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43mstream_cls\u001b[49m\u001b[43m,\u001b[49m\n\u001b[1;32m 1098\u001b[0m \u001b[43m\u001b[49m\u001b[43m)\u001b[49m\n", - "File \u001b[0;32m~/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages/llama_stack_client/_base_client.py:1058\u001b[0m, in \u001b[0;36mSyncAPIClient._request\u001b[0;34m(self, cast_to, options, retries_taken, stream, stream_cls)\u001b[0m\n\u001b[1;32m 1055\u001b[0m err\u001b[38;5;241m.\u001b[39mresponse\u001b[38;5;241m.\u001b[39mread()\n\u001b[1;32m 1057\u001b[0m log\u001b[38;5;241m.\u001b[39mdebug(\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mRe-raising status error\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[0;32m-> 1058\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_make_status_error_from_response(err\u001b[38;5;241m.\u001b[39mresponse) \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m\n\u001b[1;32m 1060\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_process_response(\n\u001b[1;32m 1061\u001b[0m cast_to\u001b[38;5;241m=\u001b[39mcast_to,\n\u001b[1;32m 1062\u001b[0m options\u001b[38;5;241m=\u001b[39moptions,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 1066\u001b[0m retries_taken\u001b[38;5;241m=\u001b[39mretries_taken,\n\u001b[1;32m 1067\u001b[0m )\n", - "\u001b[0;31mInternalServerError\u001b[0m: Error code: 500 - {'detail': 'Internal server error: An unexpected error occurred.'}" - ] - } - ], + "outputs": [], "source": [ "agentops.start_session()\n", "response = client.inference.chat_completion(\n", @@ -403,95 +130,9 @@ }, { "cell_type": "code", - "execution_count": 9, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=48206eed-d5d8-4979-ab6e-3577faff5ad4\u001b[0m\u001b[0m\n", - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=f0f95a35-876f-478d-9542-fe3261ad3d18\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "No available shields. Disable safety.\n", - "Using model: meta-llama/Llama-3.2-1B-Instruct\n", - "response=.agent_generator at 0x10f44b370>\n", - "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[97m\u001b[0m\n", - "\u001b[30m\u001b[0mresponse=.agent_generator at 0x10f44a3b0>\n", - "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m Sem\u001b[0m\u001b[36mif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m teams\u001b[0m\u001b[36m\")\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n", - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[97m\u001b[0m\n", - "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference Semifinals 2014 teams'}\u001b[0m\n", - "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference Semifinals 2014 teams\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"This would be the last Game 7 won by a road team until the 2016 NBA Finals. Game 4 of the Heat\\u2013Nets series saw LeBron James record a Heat franchise playoff high 49 points. He eventually led the Miami Heat to their fourth consecutive Eastern Conference Finals appearance with a win in Game 5. With a Game 5 win over the Portland Trail Blazers, the San Antonio Spurs advanced to the Western ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Playoffs Summary | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/NBA_2014.html\", \"description\": \"Checkout the Results, Statistics, Playoff Leaders, Per Game Stats, Advanced Stats and more for the 2014 NBA playoffs on Basketball-Reference.com\", \"type\": \"search_result\"}]}\u001b[0m\n", - "\u001b[35mshield_call> No Violation\u001b[0m\n", - "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winning\u001b[0m\u001b[33m team\u001b[0m\u001b[33m in\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m of\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m was\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m.\u001b[0m\u001b[33m The\u001b[0m\u001b[33m game\u001b[0m\u001b[33m was\u001b[0m\u001b[33m played\u001b[0m\u001b[33m between\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m and\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m with\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m ultimately\u001b[0m\u001b[33m advancing\u001b[0m\u001b[33m to\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m Finals\u001b[0m\u001b[33m.\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Multiple sessions detected. You must use session.record(). More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n", - "\u001b[31;1m🖇 AgentOps: Could not record event. Start a session by calling agentops.start_session().\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[97m\u001b[0m\n", - "\u001b[30m\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: API Key is invalid: {}.\n", - "\t Find your API key at https://app.agentops.ai/settings/projects\n", - "🖇 AgentOps: Could not end session - multiple sessions detected. You must use session.end_session() instead of agentops.end_session() More info: https://docs.agentops.ai/v1/concepts/core-concepts#session-management\n" - ] - } - ], + "outputs": [], "source": [ "import os\n", "from llama_stack_client import LlamaStackClient\n", @@ -577,32 +218,12 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 28.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=990b495b-e4c6-4c78-97d9-21dd47101ff3\u001b[0m\u001b[0m\n", - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 19.6s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=516a6f7f-56b5-4f04-bad6-a42d76fc7f55\u001b[0m\u001b[0m\n", - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 9.8s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 0 | \u001b[1mTools:\u001b[0m 0 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=e6a248fb-b78c-4fd4-bffe-50a0a8065bfa\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "agentops.end_all_sessions()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb index 64a40fe12..7249e04ea 100644 --- a/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb +++ b/examples/llama_stack_client_examples/llama_stack_example_for_ci.ipynb @@ -17,151 +17,9 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "Requirement already satisfied: llama-stack-client in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.7.0)\n", - "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.9.0)\n", - "Requirement already satisfied: httpx<1,>=0.23.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (0.28.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.2.3)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (3.0.48)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (24.9.0)\n", - "Requirement already satisfied: pydantic<3,>=1.9.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (2.10.3)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (13.9.4)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.67.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client) (4.12.2)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (1.2.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client) (3.10)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx<1,>=0.23.0->llama-stack-client) (1.0.7)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx<1,>=0.23.0->llama-stack-client) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic<3,>=1.9.0->llama-stack-client) (2.27.1)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client) (2024.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack-client) (0.2.13)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pyaml->llama-stack-client) (6.0.2)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack-client) (2.18.0)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack-client) (0.1.2)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client) (1.17.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: llama-stack in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.0.58)\n", - "Requirement already satisfied: blobfile in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.0)\n", - "Requirement already satisfied: fire in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.7.0)\n", - "Requirement already satisfied: httpx in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.28.0)\n", - "Requirement already satisfied: huggingface-hub in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.26.5)\n", - "Requirement already satisfied: llama-models>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", - "Requirement already satisfied: llama-stack-client>=0.0.58 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (0.0.58)\n", - "Requirement already satisfied: prompt-toolkit in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (3.0.48)\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (1.0.1)\n", - "Requirement already satisfied: pydantic>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.10.3)\n", - "Requirement already satisfied: requests in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.32.3)\n", - "Requirement already satisfied: rich in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (13.9.4)\n", - "Requirement already satisfied: setuptools in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (57.4.0)\n", - "Requirement already satisfied: termcolor in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack) (2.5.0)\n", - "Requirement already satisfied: PyYAML in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (6.0.2)\n", - "Requirement already satisfied: jinja2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (3.1.4)\n", - "Requirement already satisfied: tiktoken in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (0.8.0)\n", - "Requirement already satisfied: Pillow in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-models>=0.0.58->llama-stack) (11.0.0)\n", - "Requirement already satisfied: anyio<5,>=3.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.7.0)\n", - "Requirement already satisfied: click in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (8.1.7)\n", - "Requirement already satisfied: distro<2,>=1.7.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.9.0)\n", - "Requirement already satisfied: pandas in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (2.2.3)\n", - "Requirement already satisfied: pyaml in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (24.9.0)\n", - "Requirement already satisfied: sniffio in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (1.3.1)\n", - "Requirement already satisfied: tqdm in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.67.1)\n", - "Requirement already satisfied: typing-extensions<5,>=4.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from llama-stack-client>=0.0.58->llama-stack) (4.12.2)\n", - "Requirement already satisfied: certifi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (2024.8.30)\n", - "Requirement already satisfied: httpcore==1.* in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (1.0.7)\n", - "Requirement already satisfied: idna in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpx->llama-stack) (3.10)\n", - "Requirement already satisfied: h11<0.15,>=0.13 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from httpcore==1.*->httpx->llama-stack) (0.14.0)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic>=2->llama-stack) (2.27.1)\n", - "Requirement already satisfied: pycryptodomex>=3.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.21.0)\n", - "Requirement already satisfied: urllib3<3,>=1.25.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (2.2.3)\n", - "Requirement already satisfied: lxml>=4.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (5.3.0)\n", - "Requirement already satisfied: filelock>=3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from blobfile->llama-stack) (3.16.1)\n", - "Requirement already satisfied: fsspec>=2023.5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (2024.10.0)\n", - "Requirement already satisfied: packaging>=20.9 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from huggingface-hub->llama-stack) (23.2)\n", - "Requirement already satisfied: wcwidth in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from prompt-toolkit->llama-stack) (0.2.13)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests->llama-stack) (3.4.0)\n", - "Requirement already satisfied: markdown-it-py>=2.2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (3.0.0)\n", - "Requirement already satisfied: pygments<3.0.0,>=2.13.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from rich->llama-stack) (2.18.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.5.0->llama-stack-client>=0.0.58->llama-stack) (1.2.2)\n", - "Requirement already satisfied: mdurl~=0.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from markdown-it-py>=2.2.0->rich->llama-stack) (0.1.2)\n", - "Requirement already satisfied: MarkupSafe>=2.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from jinja2->llama-models>=0.0.58->llama-stack) (3.0.2)\n", - "Requirement already satisfied: numpy>=1.22.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.1.3)\n", - "Requirement already satisfied: python-dateutil>=2.8.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2.9.0.post0)\n", - "Requirement already satisfied: pytz>=2020.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", - "Requirement already satisfied: tzdata>=2022.7 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pandas->llama-stack-client>=0.0.58->llama-stack) (2024.2)\n", - "Requirement already satisfied: regex>=2022.1.18 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from tiktoken->llama-models>=0.0.58->llama-stack) (2024.11.6)\n", - "Requirement already satisfied: six>=1.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from python-dateutil>=2.8.2->pandas->llama-stack-client>=0.0.58->llama-stack) (1.17.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: agentops in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.3.19)\n", - "Requirement already satisfied: requests<3.0.0,>=2.0.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.32.3)\n", - "Requirement already satisfied: psutil==5.9.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (5.9.8)\n", - "Requirement already satisfied: packaging==23.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (23.2)\n", - "Requirement already satisfied: termcolor>=2.3.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (2.5.0)\n", - "Requirement already satisfied: PyYAML<7.0,>=5.3 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (6.0.2)\n", - "Requirement already satisfied: opentelemetry-api<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-sdk<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from agentops) (1.28.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api<2.0.0,>=1.22.0->agentops) (8.5.0)\n", - "Requirement already satisfied: googleapis-common-protos~=1.52 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.66.0)\n", - "Requirement already satisfied: opentelemetry-exporter-otlp-proto-common==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-proto==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (1.28.2)\n", - "Requirement already satisfied: protobuf<6.0,>=5.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-proto==1.28.2->opentelemetry-exporter-otlp-proto-http<2.0.0,>=1.22.0->agentops) (5.29.1)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk<2.0.0,>=1.22.0->agentops) (4.12.2)\n", - "Requirement already satisfied: charset-normalizer<4,>=2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.4.0)\n", - "Requirement already satisfied: idna<4,>=2.5 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (3.10)\n", - "Requirement already satisfied: urllib3<3,>=1.21.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2.2.3)\n", - "Requirement already satisfied: certifi>=2017.4.17 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from requests<3.0.0,>=2.0.0->agentops) (2024.8.30)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api<2.0.0,>=1.22.0->agentops) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api<2.0.0,>=1.22.0->agentops) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: python-dotenv in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.0.1)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: fastapi in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (0.115.6)\n", - "Requirement already satisfied: starlette<0.42.0,>=0.40.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (0.41.3)\n", - "Requirement already satisfied: pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (2.10.3)\n", - "Requirement already satisfied: typing-extensions>=4.8.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from fastapi) (4.12.2)\n", - "Requirement already satisfied: annotated-types>=0.6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (0.7.0)\n", - "Requirement already satisfied: pydantic-core==2.27.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from pydantic!=1.8,!=1.8.1,!=2.0.0,!=2.0.1,!=2.1.0,<3.0.0,>=1.7.4->fastapi) (2.27.1)\n", - "Requirement already satisfied: anyio<5,>=3.4.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from starlette<0.42.0,>=0.40.0->fastapi) (4.7.0)\n", - "Requirement already satisfied: exceptiongroup>=1.0.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.2.2)\n", - "Requirement already satisfied: idna>=2.8 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (3.10)\n", - "Requirement already satisfied: sniffio>=1.1 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from anyio<5,>=3.4.0->starlette<0.42.0,>=0.40.0->fastapi) (1.3.1)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-api in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n", - "Requirement already satisfied: opentelemetry-sdk in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (1.28.2)\n", - "Requirement already satisfied: opentelemetry-api==1.28.2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (1.28.2)\n", - "Requirement already satisfied: opentelemetry-semantic-conventions==0.49b2 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (0.49b2)\n", - "Requirement already satisfied: typing-extensions>=3.7.4 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-sdk) (4.12.2)\n", - "Requirement already satisfied: deprecated>=1.2.6 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (1.2.15)\n", - "Requirement already satisfied: importlib-metadata<=8.5.0,>=6.0 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from opentelemetry-api==1.28.2->opentelemetry-sdk) (8.5.0)\n", - "Requirement already satisfied: wrapt<2,>=1.10 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from deprecated>=1.2.6->opentelemetry-api==1.28.2->opentelemetry-sdk) (1.17.0)\n", - "Requirement already satisfied: zipp>=3.20 in /Users/a/src/projects/12_2024/agentops/venv/lib/python3.10/site-packages (from importlib-metadata<=8.5.0,>=6.0->opentelemetry-api==1.28.2->opentelemetry-sdk) (3.21.0)\n", - "Note: you may need to restart the kernel to use updated packages.\n" - ] - } - ], + "outputs": [], "source": [ "%pip install -U llama-stack-client\n", "%pip install -U llama-stack\n", @@ -174,17 +32,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: WARNING: agentops is out of date. Please update with the command: 'pip install --upgrade agentops'\n" - ] - } - ], + "outputs": [], "source": [ "from llama_stack_client import LlamaStackClient\n", "from llama_stack_client import LlamaStackClient\n", @@ -220,43 +70,9 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n" - ] - }, - { - "name": "stdout", - "output_type": "stream", - "text": [ - "\u001b[36mAssistant> \u001b[0m\u001b[33mSil\u001b[0m\u001b[33ment\u001b[0m\u001b[33m lunar\u001b[0m\u001b[33m glow\u001b[0m\u001b[97m\u001b[0m\n", - "No available shields. Disable safety.\n", - "Using model: meta-llama/Llama-3.2-1B-Instruct\n", - "response=.agent_generator at 0x1240c3990>\n", - "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[33mHello\u001b[0m\u001b[33m!\u001b[0m\u001b[33m How\u001b[0m\u001b[33m can\u001b[0m\u001b[33m I\u001b[0m\u001b[33m assist\u001b[0m\u001b[33m you\u001b[0m\u001b[33m today\u001b[0m\u001b[33m?\u001b[0m\u001b[97m\u001b[0m\n", - "\u001b[30m\u001b[0mresponse=.agent_generator at 0x1240c3840>\n", - "\u001b[30m\u001b[0m\u001b[33minference> \u001b[0m\u001b[36m\u001b[0m\u001b[36mbr\u001b[0m\u001b[36mave\u001b[0m\u001b[36m_search\u001b[0m\u001b[36m.call\u001b[0m\u001b[36m(query\u001b[0m\u001b[36m=\"\u001b[0m\u001b[36mN\u001b[0m\u001b[36mBA\u001b[0m\u001b[36m Western\u001b[0m\u001b[36m Conference\u001b[0m\u001b[36m semif\u001b[0m\u001b[36minals\u001b[0m\u001b[36m \u001b[0m\u001b[36m201\u001b[0m\u001b[36m4\u001b[0m\u001b[36m winners\u001b[0m\u001b[36m\")\u001b[0m\u001b[97m\u001b[0m\n", - "\u001b[32mtool_execution> Tool:brave_search Args:{'query': 'NBA Western Conference semifinals 2014 winners'}\u001b[0m\n", - "\u001b[32mtool_execution> Tool:brave_search Response:{\"query\": \"NBA Western Conference semifinals 2014 winners\", \"top_k\": [{\"title\": \"2014 NBA playoffs - Wikipedia\", \"url\": \"https://en.wikipedia.org/wiki/2014_NBA_playoffs\", \"description\": \"The 2014 NBA playoffs was the postseason tournament of the National Basketball Association's 2013\\u201314 season. The tournament concluded with the Western Conference champion San Antonio Spurs defeating the two-time defending NBA champion and Eastern Conference champion Miami Heat 4 games to ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Finals | Basketball Wiki | Fandom\", \"url\": \"https://basketball.fandom.com/wiki/2014_NBA_Finals\", \"description\": \"The 2014 NBA Finals was the championship series of the 2013\\u201314 season of the National Basketball Association (NBA) and the conclusion of the season's playoffs. The Western Conference champion San Antonio Spurs defeated the two-time defending NBA champion and Eastern Conference champion Miami ...\", \"type\": \"search_result\"}, {\"title\": \"2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs | Basketball-Reference.com\", \"url\": \"https://www.basketball-reference.com/playoffs/2014-nba-western-conference-semifinals-trail-blazers-vs-spurs.html\", \"description\": \"Summary and statistics for the 2014 NBA Western Conference Semifinals - Trail Blazers vs. Spurs\", \"type\": \"search_result\"}]}\u001b[0m\n", - "\u001b[35mshield_call> No Violation\u001b[0m\n", - "\u001b[33minference> \u001b[0m\u001b[33mThe\u001b[0m\u001b[33m winners\u001b[0m\u001b[33m of\u001b[0m\u001b[33m the\u001b[0m\u001b[33m NBA\u001b[0m\u001b[33m Western\u001b[0m\u001b[33m Conference\u001b[0m\u001b[33m semif\u001b[0m\u001b[33minals\u001b[0m\u001b[33m in\u001b[0m\u001b[33m \u001b[0m\u001b[33m201\u001b[0m\u001b[33m4\u001b[0m\u001b[33m were\u001b[0m\u001b[33m the\u001b[0m\u001b[33m San\u001b[0m\u001b[33m Antonio\u001b[0m\u001b[33m Spurs\u001b[0m\u001b[33m,\u001b[0m\u001b[33m who\u001b[0m\u001b[33m defeated\u001b[0m\u001b[33m the\u001b[0m\u001b[33m Portland\u001b[0m\u001b[33m Trail\u001b[0m\u001b[33m Blazers\u001b[0m\u001b[33m \u001b[0m\u001b[33m4\u001b[0m\u001b[33m games\u001b[0m\u001b[33m to\u001b[0m\u001b[33m \u001b[0m\u001b[33m0\u001b[0m\u001b[33m.\u001b[0m\u001b[97m\u001b[0m\n", - "\u001b[30m\u001b[0m" - ] - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "🖇 AgentOps: Session Stats - \u001b[1mDuration:\u001b[0m 7.9s | \u001b[1mCost:\u001b[0m $0.00 | \u001b[1mLLMs:\u001b[0m 4 | \u001b[1mTools:\u001b[0m 1 | \u001b[1mActions:\u001b[0m 0 | \u001b[1mErrors:\u001b[0m 0\n", - "🖇 AgentOps: \u001b[34m\u001b[34mSession Replay: https://app.agentops.ai/drilldown?session_id=c25a8294-38d0-4b49-9ecb-8940ee264020\u001b[0m\u001b[0m\n" - ] - } - ], + "outputs": [], "source": [ "### Inference Canary\n", "\n", @@ -365,13 +181,6 @@ "source": [ "agentops.end_all_sessions()" ] - }, - { - "cell_type": "code", - "execution_count": null, - "metadata": {}, - "outputs": [], - "source": [] } ], "metadata": { diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 8095a3c82..2f5682204 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -9,10 +9,6 @@ load_dotenv() -# import debugpy -# debugpy.listen(5678) -# debugpy.wait_for_client() - import agentops # type: ignore agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) From e3d75d7eb5cd7774d2638a5e3dd02b451adfe769 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Tue, 10 Dec 2024 22:47:02 +0530 Subject: [PATCH 66/69] deleting llama-stack test --- tests/llama_stack/test_llama_stack.py | 38 --------------------------- 1 file changed, 38 deletions(-) delete mode 100644 tests/llama_stack/test_llama_stack.py diff --git a/tests/llama_stack/test_llama_stack.py b/tests/llama_stack/test_llama_stack.py deleted file mode 100644 index 4e5868de0..000000000 --- a/tests/llama_stack/test_llama_stack.py +++ /dev/null @@ -1,38 +0,0 @@ -from llama_stack_client import LlamaStackClient -from llama_stack_client.types import UserMessage -from llama_stack_client.lib.inference.event_logger import EventLogger -from unittest.mock import MagicMock - - -class TestLlamaStack: - def setup_method(self): - self.client = LlamaStackClient() - self.client.inference = MagicMock() - self.client.inference.chat_completion = MagicMock( - return_value=[ - { - "choices": [ - { - "message": { - "content": "Moonlight whispers softly", - "role": "assistant", - } - } - ] - } - ] - ) - - def test_llama_stack_inference(self): - self.client.inference.chat_completion.assert_not_called() - self.client.inference.chat_completion( - messages=[ - UserMessage( - content="hello world, write me a 3 word poem about the moon", - role="user", - ), - ], - model_id="meta-llama/Llama-3.2-1B-Instruct", - stream=False, - ) - self.client.inference.chat_completion.assert_called_once() From 1777fedf44323419ff09dcc270fc7f06fd8a0080 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Tue, 10 Dec 2024 22:47:30 +0530 Subject: [PATCH 67/69] add llama stack to examples --- docs/v1/examples/examples.mdx | 3 +++ 1 file changed, 3 insertions(+) diff --git a/docs/v1/examples/examples.mdx b/docs/v1/examples/examples.mdx index df6651884..c148e6728 100644 --- a/docs/v1/examples/examples.mdx +++ b/docs/v1/examples/examples.mdx @@ -42,6 +42,9 @@ mode: "wide" Jupyter Notebook with a sample LangChain integration + + Create an agent to search the web using Brave Search and find the winner of NBA western conference semifinals 2014 + Unified interface for multiple LLM providers From 637df3ff2ccbdcca932367d2bf56a6ecfc27145b Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Tue, 10 Dec 2024 22:49:55 +0530 Subject: [PATCH 68/69] ruff --- agentops/llms/llama_stack_client.py | 2 ++ .../llama_stack_client_canary/agent_canary.py | 7 +++++-- tests/test_host_env.py | 14 ++------------ 3 files changed, 9 insertions(+), 14 deletions(-) diff --git a/agentops/llms/llama_stack_client.py b/agentops/llms/llama_stack_client.py index be7815c3f..8379a6fef 100644 --- a/agentops/llms/llama_stack_client.py +++ b/agentops/llms/llama_stack_client.py @@ -195,6 +195,7 @@ def generator(): return generator() elif inspect.isasyncgen(response): + async def agent_generator(): async for chunk in response: handle_stream_agent(chunk) @@ -202,6 +203,7 @@ async def agent_generator(): return agent_generator() elif inspect.isgenerator(response): + def agent_generator(): for chunk in response: handle_stream_agent(chunk) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 2f5682204..2ba2f1b52 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -9,13 +9,15 @@ load_dotenv() -import agentops # type: ignore +import agentops # type: ignore + agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) LLAMA_STACK_HOST = "0.0.0.0" LLAMA_STACK_PORT = 5001 INFERENCE_MODEL = "meta-llama/Llama-3.2-1B-Instruct" + async def agent_test(): client = LlamaStackClient( base_url=f"http://{LLAMA_STACK_HOST}:{LLAMA_STACK_PORT}", @@ -76,6 +78,7 @@ async def agent_test(): for log in EventLogger().log(response): log.print() + agentops.start_session() asyncio.run(agent_test()) -agentops.end_session(end_state="Success") \ No newline at end of file +agentops.end_session(end_state="Success") diff --git a/tests/test_host_env.py b/tests/test_host_env.py index e6194d3ac..c22796f3f 100644 --- a/tests/test_host_env.py +++ b/tests/test_host_env.py @@ -7,18 +7,8 @@ def mock_partitions(): return [ - sdiskpart( - device="/dev/sda1", - mountpoint="/", - fstype="ext4", - opts="rw,relatime" - ), - sdiskpart( - device="z:\\", - mountpoint="z:\\", - fstype="ntfs", - opts="rw,relatime" - ), + sdiskpart(device="/dev/sda1", mountpoint="/", fstype="ext4", opts="rw,relatime"), + sdiskpart(device="z:\\", mountpoint="z:\\", fstype="ntfs", opts="rw,relatime"), ] From f9468811f00116e8935eac3df6863137581180b5 Mon Sep 17 00:00:00 2001 From: Pratyush Shukla Date: Tue, 10 Dec 2024 22:51:58 +0530 Subject: [PATCH 69/69] fix import --- .../providers/llama_stack_client_canary/agent_canary.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py index 2ba2f1b52..1060627db 100644 --- a/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py +++ b/tests/core_manual_tests/providers/llama_stack_client_canary/agent_canary.py @@ -1,3 +1,4 @@ +import agentops import asyncio import os from dotenv import load_dotenv @@ -9,8 +10,6 @@ load_dotenv() -import agentops # type: ignore - agentops.init(os.getenv("AGENTOPS_API_KEY"), default_tags=["llama-stack-client-example"], auto_start_session=False) LLAMA_STACK_HOST = "0.0.0.0"