diff --git a/README.md b/README.md
index 26c97a5e..80554d1f 100644
--- a/README.md
+++ b/README.md
@@ -249,7 +249,113 @@ agentops.end_session('Success')
 </details>
 
 
-### LiteLLM
+### Anthropic ﹨
+
+Track agents built with the Anthropic Python SDK (>=0.32.0).
+
+- [AgentOps integration example](./examples/anthropic/anthropic_example.ipynb)
+- [Official Anthropic documentation](https://docs.anthropic.com/en/docs/welcome)
+
+<details>
+  <summary>Installation</summary>
+  
+```bash
+pip install anthropic
+```
+
+```python python
+import anthropic
+import agentops
+
+# Beginning of program's code (i.e. main.py, __init__.py)
+agentops.init(<INSERT YOUR API KEY HERE>)
+
+client = anthropic.Anthropic(
+    # This is the default and can be omitted
+    api_key=os.environ.get("ANTHROPIC_API_KEY"),
+)
+
+message = client.messages.create(
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "Tell me a cool fact about AgentOps",
+            }
+        ],
+        model="claude-3-opus-20240229",
+    )
+print(message.content)
+
+agentops.end_session('Success')
+```
+
+Streaming
+```python python
+import anthropic
+import agentops
+
+# Beginning of program's code (i.e. main.py, __init__.py)
+agentops.init(<INSERT YOUR API KEY HERE>)
+
+client = anthropic.Anthropic(
+    # This is the default and can be omitted
+    api_key=os.environ.get("ANTHROPIC_API_KEY"),
+)
+
+stream = client.messages.create(
+    max_tokens=1024,
+    model="claude-3-opus-20240229",
+    messages=[
+        {
+            "role": "user",
+            "content": "Tell me something cool about streaming agents",
+        }
+    ],
+    stream=True,
+)
+
+response = ""
+for event in stream:
+    if event.type == "content_block_delta":
+        response += event.delta.text
+    elif event.type == "message_stop":
+        print("\n")
+        print(response)
+        print("\n")
+```
+
+Async
+
+```python python
+import asyncio
+from anthropic import AsyncAnthropic
+
+client = AsyncAnthropic(
+    # This is the default and can be omitted
+    api_key=os.environ.get("ANTHROPIC_API_KEY"),
+)
+
+
+async def main() -> None:
+    message = await client.messages.create(
+        max_tokens=1024,
+        messages=[
+            {
+                "role": "user",
+                "content": "Tell me something interesting about async agents",
+            }
+        ],
+        model="claude-3-opus-20240229",
+    )
+    print(message.content)
+
+
+await main()
+```
+</details>
+
+### LiteLLM 🚅
 
 AgentOps provides support for LiteLLM(>=1.3.1), allowing you to call 100+ LLMs using the same Input/Output Format. 
 
diff --git a/agentops/client.py b/agentops/client.py
index 8878e806..f2fc4dea 100644
--- a/agentops/client.py
+++ b/agentops/client.py
@@ -19,7 +19,7 @@
 from termcolor import colored
 
 from .event import Event, ErrorEvent
-from .helpers import (
+from .singleton import (
     conditional_singleton,
 )
 from .session import Session, active_sessions
@@ -27,7 +27,7 @@
 from .log_config import logger
 from .meta_client import MetaClient
 from .config import Configuration
-from .llm_tracker import LlmTracker
+from .llms import LlmTracker
 
 
 @conditional_singleton
diff --git a/agentops/event.py b/agentops/event.py
index a08efcd7..70ec059c 100644
--- a/agentops/event.py
+++ b/agentops/event.py
@@ -70,7 +70,7 @@ class LLMEvent(Event):
     thread_id(UUID, optional): The unique identifier of the contextual thread that a message pertains to.
     prompt(str, list, optional): The message or messages that were used to prompt the LLM. Preferably in ChatML format which is more fully supported by AgentOps.
     prompt_tokens(int, optional): The number of tokens in the prompt message.
-    completion(str, object, optional): The message or returned by the LLM. Preferably in ChatML format which is more fully supported by AgentOps.
+    completion(str, object, optional): The message or messages returned by the LLM. Preferably in ChatML format which is more fully supported by AgentOps.
     completion_tokens(int, optional): The number of tokens in the completion message.
     model(str, optional): LLM model e.g. "gpt-4", "gpt-3.5-turbo".
 
diff --git a/agentops/helpers.py b/agentops/helpers.py
index e04f4b57..2ec13264 100644
--- a/agentops/helpers.py
+++ b/agentops/helpers.py
@@ -1,7 +1,6 @@
 from pprint import pformat
 from functools import wraps
 from datetime import datetime, timezone
-import json
 import inspect
 from typing import Union
 import http.client
@@ -11,38 +10,6 @@
 from .log_config import logger
 from uuid import UUID
 from importlib.metadata import version
-import subprocess
-
-ao_instances = {}
-
-
-def singleton(class_):
-
-    def getinstance(*args, **kwargs):
-        if class_ not in ao_instances:
-            ao_instances[class_] = class_(*args, **kwargs)
-        return ao_instances[class_]
-
-    return getinstance
-
-
-def conditional_singleton(class_):
-
-    def getinstance(*args, **kwargs):
-        use_singleton = kwargs.pop("use_singleton", True)
-        if use_singleton:
-            if class_ not in ao_instances:
-                ao_instances[class_] = class_(*args, **kwargs)
-            return ao_instances[class_]
-        else:
-            return class_(*args, **kwargs)
-
-    return getinstance
-
-
-def clear_singletons():
-    global ao_instances
-    ao_instances = {}
 
 
 def get_ISO_time():
diff --git a/agentops/llm_tracker.py b/agentops/llm_tracker.py
deleted file mode 100644
index adadd886..00000000
--- a/agentops/llm_tracker.py
+++ /dev/null
@@ -1,1057 +0,0 @@
-import functools
-import inspect
-import pprint
-import sys
-from importlib import import_module
-from importlib.metadata import version
-from typing import Optional
-
-from packaging.version import Version, parse
-
-from .session import Session
-from .event import ActionEvent, ErrorEvent, LLMEvent
-from .helpers import check_call_stack_for_agent_id, get_ISO_time
-from .log_config import logger
-from .event import LLMEvent, ActionEvent, ToolEvent, ErrorEvent
-from .helpers import get_ISO_time, check_call_stack_for_agent_id
-import inspect
-from typing import Optional
-import pprint
-from .time_travel import (
-    fetch_completion_override_from_time_travel_cache,
-    # fetch_prompt_override_from_time_travel_cache,
-)
-
-original_func = {}
-original_create = None
-original_create_async = None
-
-
-class LlmTracker:
-    SUPPORTED_APIS = {
-        "litellm": {"1.3.1": ("openai_chat_completions.completion",)},
-        "openai": {
-            "1.0.0": ("chat.completions.create",),
-            "0.0.0": (
-                "ChatCompletion.create",
-                "ChatCompletion.acreate",
-            ),
-        },
-        "cohere": {
-            "5.4.0": ("chat", "chat_stream"),
-        },
-        "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")},
-        "groq": {
-            "0.9.0": ("Client.chat", "AsyncClient.chat"),
-        },
-    }
-
-    def __init__(self, client):
-        self.client = client
-        self.completion = ""
-        self.llm_event: Optional[LLMEvent] = None
-
-    def _handle_response_v0_openai(
-        self, response, kwargs, init_timestamp, session: Optional[Session] = None
-    ):
-        """Handle responses for OpenAI versions <v1.0.0"""
-
-        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            self.llm_event.session_id = session.session_id
-
-        def handle_stream_chunk(chunk):
-            # NOTE: prompt/completion usage not returned in response when streaming
-            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
-            if self.llm_event.returns == None:
-                self.llm_event.returns = chunk
-
-            try:
-                accumulated_delta = self.llm_event.returns["choices"][0]["delta"]
-                self.llm_event.agent_id = check_call_stack_for_agent_id()
-                self.llm_event.model = chunk["model"]
-                self.llm_event.prompt = kwargs["messages"]
-                choice = chunk["choices"][
-                    0
-                ]  # NOTE: We assume for completion only choices[0] is relevant
-
-                if choice["delta"].get("content"):
-                    accumulated_delta["content"] += choice["delta"].content
-
-                if choice["delta"].get("role"):
-                    accumulated_delta["role"] = choice["delta"].get("role")
-
-                if choice["finish_reason"]:
-                    # Streaming is done. Record LLMEvent
-                    self.llm_event.returns.choices[0]["finish_reason"] = choice[
-                        "finish_reason"
-                    ]
-                    self.llm_event.completion = {
-                        "role": accumulated_delta["role"],
-                        "content": accumulated_delta["content"],
-                    }
-                    self.llm_event.end_timestamp = get_ISO_time()
-
-                    self._safe_record(session, self.llm_event)
-            except Exception as e:
-                self._safe_record(
-                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-                )
-
-                kwargs_str = pprint.pformat(kwargs)
-                chunk = pprint.pformat(chunk)
-                logger.warning(
-                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
-                    f"chunk:\n {chunk}\n"
-                    f"kwargs:\n {kwargs_str}\n"
-                )
-
-        # if the response is a generator, decorate the generator
-        if inspect.isasyncgen(response):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-
-                    yield chunk
-
-            return async_generator()
-
-        elif inspect.isgenerator(response):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-
-                    yield chunk
-
-            return generator()
-
-        # v0.0.0 responses are dicts
-        try:
-            self.llm_event.returns = response
-            self.llm_event.agent_id = check_call_stack_for_agent_id()
-            self.llm_event.prompt = kwargs["messages"]
-            self.llm_event.prompt_tokens = response["usage"]["prompt_tokens"]
-            self.llm_event.completion = {
-                "role": "assistant",
-                "content": response["choices"][0]["message"]["content"],
-            }
-            self.llm_event.completion_tokens = response["usage"]["completion_tokens"]
-            self.llm_event.model = response["model"]
-            self.llm_event.end_timestamp = get_ISO_time()
-
-            self._safe_record(session, self.llm_event)
-        except Exception as e:
-            self._safe_record(
-                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-            )
-
-            kwargs_str = pprint.pformat(kwargs)
-            response = pprint.pformat(response)
-            logger.warning(
-                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
-                f"response:\n {response}\n"
-                f"kwargs:\n {kwargs_str}\n"
-            )
-
-        return response
-
-    def _handle_response_v1_openai(
-        self, response, kwargs, init_timestamp, session: Optional[Session] = None
-    ):
-        """Handle responses for OpenAI versions >v1.0.0"""
-        from openai import AsyncStream, Stream
-        from openai.resources import AsyncCompletions
-        from openai.types.chat import ChatCompletionChunk
-
-        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            self.llm_event.session_id = session.session_id
-
-        def handle_stream_chunk(chunk: ChatCompletionChunk):
-            # NOTE: prompt/completion usage not returned in response when streaming
-            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
-            if self.llm_event.returns == None:
-                self.llm_event.returns = chunk
-
-            try:
-                accumulated_delta = self.llm_event.returns.choices[0].delta
-                self.llm_event.agent_id = check_call_stack_for_agent_id()
-                self.llm_event.model = chunk.model
-                self.llm_event.prompt = kwargs["messages"]
-
-                # NOTE: We assume for completion only choices[0] is relevant
-                choice = chunk.choices[0]
-
-                if choice.delta.content:
-                    accumulated_delta.content += choice.delta.content
-
-                if choice.delta.role:
-                    accumulated_delta.role = choice.delta.role
-
-                if choice.delta.tool_calls:
-                    accumulated_delta.tool_calls = choice.delta.tool_calls
-
-                if choice.delta.function_call:
-                    accumulated_delta.function_call = choice.delta.function_call
-
-                if choice.finish_reason:
-                    # Streaming is done. Record LLMEvent
-                    self.llm_event.returns.choices[0].finish_reason = (
-                        choice.finish_reason
-                    )
-                    self.llm_event.completion = {
-                        "role": accumulated_delta.role,
-                        "content": accumulated_delta.content,
-                        "function_call": accumulated_delta.function_call,
-                        "tool_calls": accumulated_delta.tool_calls,
-                    }
-                    self.llm_event.end_timestamp = get_ISO_time()
-
-                    self._safe_record(session, self.llm_event)
-            except Exception as e:
-                self._safe_record(
-                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-                )
-
-                kwargs_str = pprint.pformat(kwargs)
-                chunk = pprint.pformat(chunk)
-                logger.warning(
-                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
-                    f"chunk:\n {chunk}\n"
-                    f"kwargs:\n {kwargs_str}\n"
-                )
-
-        # if the response is a generator, decorate the generator
-        if isinstance(response, Stream):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
-        # For asynchronous AsyncStream
-        elif isinstance(response, AsyncStream):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
-        # For async AsyncCompletion
-        elif isinstance(response, AsyncCompletions):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
-        # v1.0.0+ responses are objects
-        try:
-            self.llm_event.returns = response
-            self.llm_event.agent_id = check_call_stack_for_agent_id()
-            self.llm_event.prompt = kwargs["messages"]
-            self.llm_event.prompt_tokens = response.usage.prompt_tokens
-            self.llm_event.completion = response.choices[0].message.model_dump()
-            self.llm_event.completion_tokens = response.usage.completion_tokens
-            self.llm_event.model = response.model
-
-            self._safe_record(session, self.llm_event)
-        except Exception as e:
-            self._safe_record(
-                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-            )
-
-            kwargs_str = pprint.pformat(kwargs)
-            response = pprint.pformat(response)
-            logger.warning(
-                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
-                f"response:\n {response}\n"
-                f"kwargs:\n {kwargs_str}\n"
-            )
-
-        return response
-
-    def _handle_response_cohere(
-        self, response, kwargs, init_timestamp, session: Optional[Session] = None
-    ):
-        """Handle responses for Cohere versions >v5.4.0"""
-        from cohere.types.non_streamed_chat_response import NonStreamedChatResponse
-        from cohere.types.streamed_chat_response import (
-            StreamedChatResponse,
-            StreamedChatResponse_CitationGeneration,
-            StreamedChatResponse_SearchQueriesGeneration,
-            StreamedChatResponse_SearchResults,
-            StreamedChatResponse_StreamEnd,
-            StreamedChatResponse_StreamStart,
-            StreamedChatResponse_TextGeneration,
-            StreamedChatResponse_ToolCallsGeneration,
-        )
-
-        # from cohere.types.chat import ChatGenerationChunk
-        # NOTE: Cohere only returns one message and its role will be CHATBOT which we are coercing to "assistant"
-        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            self.llm_event.session_id = session.session_id
-
-        self.action_events = {}
-
-        def handle_stream_chunk(chunk, session: Optional[Session] = None):
-
-            # We take the first chunk and accumulate the deltas from all subsequent chunks to build one full chat completion
-            if isinstance(chunk, StreamedChatResponse_StreamStart):
-                self.llm_event.returns = chunk
-                self.llm_event.agent_id = check_call_stack_for_agent_id()
-                self.llm_event.model = kwargs.get("model", "command-r-plus")
-                self.llm_event.prompt = kwargs["message"]
-                self.llm_event.completion = ""
-                return
-
-            try:
-                if isinstance(chunk, StreamedChatResponse_StreamEnd):
-                    # StreamedChatResponse_TextGeneration = LLMEvent
-                    self.llm_event.completion = {
-                        "role": "assistant",
-                        "content": chunk.response.text,
-                    }
-                    self.llm_event.end_timestamp = get_ISO_time()
-                    self._safe_record(session, self.llm_event)
-
-                    # StreamedChatResponse_SearchResults = ActionEvent
-                    search_results = chunk.response.search_results
-                    for search_result in search_results:
-                        query = search_result.search_query
-                        if query.generation_id in self.action_events:
-                            action_event = self.action_events[query.generation_id]
-                            search_result_dict = search_result.dict()
-                            del search_result_dict["search_query"]
-                            action_event.returns = search_result_dict
-                            action_event.end_timestamp = get_ISO_time()
-
-                    # StreamedChatResponse_CitationGeneration = ActionEvent
-                    documents = {doc["id"]: doc for doc in chunk.response.documents}
-                    citations = chunk.response.citations
-                    for citation in citations:
-                        citation_id = f"{citation.start}.{citation.end}"
-                        if citation_id in self.action_events:
-                            action_event = self.action_events[citation_id]
-                            citation_dict = citation.dict()
-                            # Replace document_ids with the actual documents
-                            citation_dict["documents"] = [
-                                documents[doc_id]
-                                for doc_id in citation_dict["document_ids"]
-                                if doc_id in documents
-                            ]
-                            del citation_dict["document_ids"]
-
-                            action_event.returns = citation_dict
-                            action_event.end_timestamp = get_ISO_time()
-
-                    for key, action_event in self.action_events.items():
-                        self._safe_record(session, action_event)
-
-                elif isinstance(chunk, StreamedChatResponse_TextGeneration):
-                    self.llm_event.completion += chunk.text
-                elif isinstance(chunk, StreamedChatResponse_ToolCallsGeneration):
-                    pass
-                elif isinstance(chunk, StreamedChatResponse_CitationGeneration):
-                    for citation in chunk.citations:
-                        self.action_events[f"{citation.start}.{citation.end}"] = (
-                            ActionEvent(
-                                action_type="citation",
-                                init_timestamp=get_ISO_time(),
-                                params=citation.text,
-                            )
-                        )
-                elif isinstance(chunk, StreamedChatResponse_SearchQueriesGeneration):
-                    for query in chunk.search_queries:
-                        self.action_events[query.generation_id] = ActionEvent(
-                            action_type="search_query",
-                            init_timestamp=get_ISO_time(),
-                            params=query.text,
-                        )
-                elif isinstance(chunk, StreamedChatResponse_SearchResults):
-                    pass
-
-            except Exception as e:
-                self._safe_record(
-                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-                )
-
-                kwargs_str = pprint.pformat(kwargs)
-                chunk = pprint.pformat(chunk)
-                logger.warning(
-                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
-                    f"chunk:\n {chunk}\n"
-                    f"kwargs:\n {kwargs_str}\n"
-                )
-
-        # NOTE: As of Cohere==5.x.x, async is not supported
-        # if the response is a generator, decorate the generator
-        if inspect.isasyncgen(response):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
-        elif inspect.isgenerator(response):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
-        # TODO: we should record if they pass a chat.connectors, because it means they intended to call a tool
-        # Not enough to record StreamedChatResponse_ToolCallsGeneration because the tool may have not gotten called
-
-        try:
-            self.llm_event.returns = response
-            self.llm_event.agent_id = check_call_stack_for_agent_id()
-            self.llm_event.prompt = []
-            if response.chat_history:
-                role_map = {"USER": "user", "CHATBOT": "assistant", "SYSTEM": "system"}
-
-                for i in range(len(response.chat_history) - 1):
-                    message = response.chat_history[i]
-                    self.llm_event.prompt.append(
-                        {
-                            "role": role_map.get(message.role, message.role),
-                            "content": message.message,
-                        }
-                    )
-
-                last_message = response.chat_history[-1]
-                self.llm_event.completion = {
-                    "role": role_map.get(last_message.role, last_message.role),
-                    "content": last_message.message,
-                }
-            self.llm_event.prompt_tokens = response.meta.tokens.input_tokens
-            self.llm_event.completion_tokens = response.meta.tokens.output_tokens
-            self.llm_event.model = kwargs.get("model", "command-r-plus")
-
-            self._safe_record(session, self.llm_event)
-        except Exception as e:
-            self._safe_record(
-                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-            )
-            kwargs_str = pprint.pformat(kwargs)
-            response = pprint.pformat(response)
-            logger.warning(
-                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
-                f"response:\n {response}\n"
-                f"kwargs:\n {kwargs_str}\n"
-            )
-
-        return response
-
-    def _handle_response_ollama(
-        self, response, kwargs, init_timestamp, session: Optional[Session] = None
-    ) -> None:
-        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-
-        def handle_stream_chunk(chunk: dict):
-            message = chunk.get("message", {"role": None, "content": ""})
-
-            if chunk.get("done"):
-                self.llm_event.completion["content"] += message.get("content")
-                self.llm_event.end_timestamp = get_ISO_time()
-                self.llm_event.model = f'ollama/{chunk.get("model")}'
-                self.llm_event.returns = chunk
-                self.llm_event.returns["message"] = self.llm_event.completion
-                self.llm_event.prompt = kwargs["messages"]
-                self.llm_event.agent_id = check_call_stack_for_agent_id()
-                self.client.record(self.llm_event)
-
-            if self.llm_event.completion is None:
-                self.llm_event.completion = message
-            else:
-                self.llm_event.completion["content"] += message.get("content")
-
-        if inspect.isgenerator(response):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
-        self.llm_event.end_timestamp = get_ISO_time()
-
-        self.llm_event.model = f'ollama/{response["model"]}'
-        self.llm_event.returns = response
-        self.llm_event.agent_id = check_call_stack_for_agent_id()
-        self.llm_event.prompt = kwargs["messages"]
-        self.llm_event.completion = response["message"]
-
-        self._safe_record(session, self.llm_event)
-        return response
-
-    def _handle_response_groq(
-        self, response, kwargs, init_timestamp, session: Optional[Session] = None
-    ):
-        """Handle responses for OpenAI versions >v1.0.0"""
-        from groq import AsyncStream, Stream
-        from groq.resources.chat import AsyncCompletions
-        from groq.types.chat import ChatCompletionChunk
-
-        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
-        if session is not None:
-            self.llm_event.session_id = session.session_id
-
-        def handle_stream_chunk(chunk: ChatCompletionChunk):
-            # NOTE: prompt/completion usage not returned in response when streaming
-            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
-            if self.llm_event.returns == None:
-                self.llm_event.returns = chunk
-
-            try:
-                accumulated_delta = self.llm_event.returns.choices[0].delta
-                self.llm_event.agent_id = check_call_stack_for_agent_id()
-                self.llm_event.model = chunk.model
-                self.llm_event.prompt = kwargs["messages"]
-
-                # NOTE: We assume for completion only choices[0] is relevant
-                choice = chunk.choices[0]
-
-                if choice.delta.content:
-                    accumulated_delta.content += choice.delta.content
-
-                if choice.delta.role:
-                    accumulated_delta.role = choice.delta.role
-
-                if choice.delta.tool_calls:
-                    accumulated_delta.tool_calls = choice.delta.tool_calls
-
-                if choice.delta.function_call:
-                    accumulated_delta.function_call = choice.delta.function_call
-
-                if choice.finish_reason:
-                    # Streaming is done. Record LLMEvent
-                    self.llm_event.returns.choices[0].finish_reason = (
-                        choice.finish_reason
-                    )
-                    self.llm_event.completion = {
-                        "role": accumulated_delta.role,
-                        "content": accumulated_delta.content,
-                        "function_call": accumulated_delta.function_call,
-                        "tool_calls": accumulated_delta.tool_calls,
-                    }
-                    self.llm_event.end_timestamp = get_ISO_time()
-
-                    self._safe_record(session, self.llm_event)
-            except Exception as e:
-                self._safe_record(
-                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-                )
-
-                kwargs_str = pprint.pformat(kwargs)
-                chunk = pprint.pformat(chunk)
-                logger.warning(
-                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
-                    f"chunk:\n {chunk}\n"
-                    f"kwargs:\n {kwargs_str}\n"
-                )
-
-        # if the response is a generator, decorate the generator
-        if isinstance(response, Stream):
-
-            def generator():
-                for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return generator()
-
-        # For asynchronous AsyncStream
-        elif isinstance(response, AsyncStream):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
-        # For async AsyncCompletion
-        elif isinstance(response, AsyncCompletions):
-
-            async def async_generator():
-                async for chunk in response:
-                    handle_stream_chunk(chunk)
-                    yield chunk
-
-            return async_generator()
-
-        # v1.0.0+ responses are objects
-        try:
-            self.llm_event.returns = response.model_dump()
-            self.llm_event.agent_id = check_call_stack_for_agent_id()
-            self.llm_event.prompt = kwargs["messages"]
-            self.llm_event.prompt_tokens = response.usage.prompt_tokens
-            self.llm_event.completion = response.choices[0].message.model_dump()
-            self.llm_event.completion_tokens = response.usage.completion_tokens
-            self.llm_event.model = response.model
-
-            self._safe_record(session, self.llm_event)
-        except Exception as e:
-            self._safe_record(
-                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
-            )
-
-            kwargs_str = pprint.pformat(kwargs)
-            response = pprint.pformat(response)
-            logger.warning(
-                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
-                f"response:\n {response}\n"
-                f"kwargs:\n {kwargs_str}\n"
-            )
-
-        return response
-
-    def override_openai_v1_completion(self):
-        from openai.resources.chat import completions
-        from openai.types.chat import ChatCompletion, ChatCompletionChunk
-
-        # Store the original method
-        global original_create
-        original_create = completions.Completions.create
-
-        def patched_function(*args, **kwargs):
-            init_timestamp = get_ISO_time()
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-
-            completion_override = fetch_completion_override_from_time_travel_cache(
-                kwargs
-            )
-            if completion_override:
-                result_model = None
-                pydantic_models = (ChatCompletion, ChatCompletionChunk)
-                for pydantic_model in pydantic_models:
-                    try:
-                        result_model = pydantic_model.model_validate_json(
-                            completion_override
-                        )
-                        break
-                    except Exception as e:
-                        pass
-
-                if result_model is None:
-                    logger.error(
-                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
-                        f"Time Travel: Completion override was:\n"
-                        f"{pprint.pformat(completion_override)}"
-                    )
-                    return None
-                return self._handle_response_v1_openai(
-                    result_model, kwargs, init_timestamp, session=session
-                )
-
-            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
-            # if prompt_override:
-            #     kwargs["messages"] = prompt_override["messages"]
-
-            # Call the original function with its original arguments
-            result = original_create(*args, **kwargs)
-            return self._handle_response_v1_openai(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        # Override the original method with the patched one
-        completions.Completions.create = patched_function
-
-    def override_openai_v1_async_completion(self):
-        from openai.resources.chat import completions
-        from openai.types.chat import ChatCompletion, ChatCompletionChunk
-
-        # Store the original method
-        global original_create_async
-        original_create_async = completions.AsyncCompletions.create
-
-        async def patched_function(*args, **kwargs):
-
-            init_timestamp = get_ISO_time()
-
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-
-            completion_override = fetch_completion_override_from_time_travel_cache(
-                kwargs
-            )
-            if completion_override:
-                result_model = None
-                pydantic_models = (ChatCompletion, ChatCompletionChunk)
-                for pydantic_model in pydantic_models:
-                    try:
-                        result_model = pydantic_model.model_validate_json(
-                            completion_override
-                        )
-                        break
-                    except Exception as e:
-                        pass
-
-                if result_model is None:
-                    logger.error(
-                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
-                        f"Time Travel: Completion override was:\n"
-                        f"{pprint.pformat(completion_override)}"
-                    )
-                    return None
-                return self._handle_response_v1_openai(
-                    result_model, kwargs, init_timestamp, session=session
-                )
-
-            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
-            # if prompt_override:
-            #     kwargs["messages"] = prompt_override["messages"]
-
-            # Call the original function with its original arguments
-            result = await original_create_async(*args, **kwargs)
-            return self._handle_response_v1_openai(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        # Override the original method with the patched one
-        completions.AsyncCompletions.create = patched_function
-
-    def override_litellm_completion(self):
-        import litellm
-        from openai.types.chat import (
-            ChatCompletion,
-        )  # Note: litellm calls all LLM APIs using the OpenAI format
-
-        original_create = litellm.completion
-
-        def patched_function(*args, **kwargs):
-            init_timestamp = get_ISO_time()
-
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-
-            completion_override = fetch_completion_override_from_time_travel_cache(
-                kwargs
-            )
-            if completion_override:
-                result_model = ChatCompletion.model_validate_json(completion_override)
-                return self._handle_response_v1_openai(
-                    result_model, kwargs, init_timestamp, session=session
-                )
-
-            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
-            # if prompt_override:
-            #     kwargs["messages"] = prompt_override["messages"]
-
-            # Call the original function with its original arguments
-            result = original_create(*args, **kwargs)
-            return self._handle_response_v1_openai(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        litellm.completion = patched_function
-
-    def override_litellm_async_completion(self):
-        import litellm
-        from openai.types.chat import (
-            ChatCompletion,
-        )  # Note: litellm calls all LLM APIs using the OpenAI format
-
-        original_create_async = litellm.acompletion
-
-        async def patched_function(*args, **kwargs):
-            init_timestamp = get_ISO_time()
-
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-
-            completion_override = fetch_completion_override_from_time_travel_cache(
-                kwargs
-            )
-            if completion_override:
-                result_model = ChatCompletion.model_validate_json(completion_override)
-                return self._handle_response_v1_openai(
-                    result_model, kwargs, init_timestamp, session=session
-                )
-
-            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
-            # if prompt_override:
-            #     kwargs["messages"] = prompt_override["messages"]
-
-            # Call the original function with its original arguments
-            result = await original_create_async(*args, **kwargs)
-            return self._handle_response_v1_openai(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        # Override the original method with the patched one
-        litellm.acompletion = patched_function
-
-    def override_cohere_chat(self):
-        import cohere
-        import cohere.types
-
-        original_chat = cohere.Client.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-            result = original_chat(*args, **kwargs)
-            return self._handle_response_cohere(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        # Override the original method with the patched one
-        cohere.Client.chat = patched_function
-
-    def override_cohere_chat_stream(self):
-        import cohere
-
-        original_chat = cohere.Client.chat_stream
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_chat(*args, **kwargs)
-            return self._handle_response_cohere(result, kwargs, init_timestamp)
-
-        # Override the original method with the patched one
-        cohere.Client.chat_stream = patched_function
-
-    def override_ollama_chat(self):
-        import ollama
-
-        original_func["ollama.chat"] = ollama.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.chat"](*args, **kwargs)
-            return self._handle_response_ollama(
-                result, kwargs, init_timestamp, session=kwargs.get("session", None)
-            )
-
-        # Override the original method with the patched one
-        ollama.chat = patched_function
-
-    def override_ollama_chat_client(self):
-        from ollama import Client
-
-        original_func["ollama.Client.chat"] = Client.chat
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_func["ollama.Client.chat"](*args, **kwargs)
-            return self._handle_response_ollama(result, kwargs, init_timestamp)
-
-        # Override the original method with the patched one
-        Client.chat = patched_function
-
-    def override_ollama_chat_async_client(self):
-        from ollama import AsyncClient
-
-        original_func["ollama.AsyncClient.chat"] = AsyncClient.chat
-
-        async def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = await original_func["ollama.AsyncClient.chat"](*args, **kwargs)
-            return self._handle_response_ollama(result, kwargs, init_timestamp)
-
-        # Override the original method with the patched one
-        AsyncClient.chat = patched_function
-
-    def override_groq_chat(self):
-        from groq.resources.chat import completions
-
-        original_create = completions.Completions.create
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            session = kwargs.get("session", None)
-            if "session" in kwargs.keys():
-                del kwargs["session"]
-            result = original_create(*args, **kwargs)
-            return self._handle_response_groq(
-                result, kwargs, init_timestamp, session=session
-            )
-
-        # Override the original method with the patched one
-        completions.Completions.create = patched_function
-
-    def override_groq_chat_stream(self):
-        from groq.resources.chat import completions
-
-        original_create = completions.AsyncCompletions.create
-
-        def patched_function(*args, **kwargs):
-            # Call the original function with its original arguments
-            init_timestamp = get_ISO_time()
-            result = original_create(*args, **kwargs)
-            return self._handle_response_groq(result, kwargs, init_timestamp)
-
-        # Override the original method with the patched one
-        completions.AsyncCompletions.create = patched_function
-
-    def _override_method(self, api, method_path, module):
-        def handle_response(result, kwargs, init_timestamp):
-            if api == "openai":
-                return self._handle_response_v0_openai(result, kwargs, init_timestamp)
-            return result
-
-        def wrap_method(original_method):
-            if inspect.iscoroutinefunction(original_method):
-
-                @functools.wraps(original_method)
-                async def async_method(*args, **kwargs):
-                    init_timestamp = get_ISO_time()
-                    response = await original_method(*args, **kwargs)
-                    return handle_response(response, kwargs, init_timestamp)
-
-                return async_method
-
-            else:
-
-                @functools.wraps(original_method)
-                def sync_method(*args, **kwargs):
-                    init_timestamp = get_ISO_time()
-                    response = original_method(*args, **kwargs)
-                    return handle_response(response, kwargs, init_timestamp)
-
-                return sync_method
-
-        method_parts = method_path.split(".")
-        original_method = functools.reduce(getattr, method_parts, module)
-        new_method = wrap_method(original_method)
-
-        if len(method_parts) == 1:
-            setattr(module, method_parts[0], new_method)
-        else:
-            parent = functools.reduce(getattr, method_parts[:-1], module)
-            setattr(parent, method_parts[-1], new_method)
-
-    def override_api(self):
-        """
-        Overrides key methods of the specified API to record events.
-        """
-
-        for api in self.SUPPORTED_APIS:
-            if api in sys.modules:
-                module = import_module(api)
-                if api == "litellm":
-                    module_version = version(api)
-                    if module_version is None:
-                        logger.warning(
-                            f"Cannot determine LiteLLM version. Only LiteLLM>=1.3.1 supported."
-                        )
-
-                    if Version(module_version) >= parse("1.3.1"):
-                        self.override_litellm_completion()
-                        self.override_litellm_async_completion()
-                    else:
-                        logger.warning(
-                            f"Only LiteLLM>=1.3.1 supported. v{module_version} found."
-                        )
-                    return  # If using an abstraction like litellm, do not patch the underlying LLM APIs
-
-                if api == "openai":
-                    # Patch openai v1.0.0+ methods
-                    if hasattr(module, "__version__"):
-                        module_version = parse(module.__version__)
-                        if module_version >= parse("1.0.0"):
-                            self.override_openai_v1_completion()
-                            self.override_openai_v1_async_completion()
-                        else:
-                            # Patch openai <v1.0.0 methods
-                            for method_path in self.SUPPORTED_APIS["openai"]["0.0.0"]:
-                                self._override_method(api, method_path, module)
-
-                if api == "cohere":
-                    # Patch cohere v5.4.0+ methods
-                    module_version = version(api)
-                    if module_version is None:
-                        logger.warning(
-                            f"Cannot determine Cohere version. Only Cohere>=5.4.0 supported."
-                        )
-
-                    if Version(module_version) >= parse("5.4.0"):
-                        self.override_cohere_chat()
-                        self.override_cohere_chat_stream()
-                    else:
-                        logger.warning(
-                            f"Only Cohere>=5.4.0 supported. v{module_version} found."
-                        )
-
-                if api == "ollama":
-                    module_version = version(api)
-
-                    if Version(module_version) >= parse("0.0.1"):
-                        self.override_ollama_chat()
-                        self.override_ollama_chat_client()
-                        self.override_ollama_chat_async_client()
-                    else:
-                        logger.warning(
-                            f"Only Ollama>=0.0.1 supported. v{module_version} found."
-                        )
-
-                if api == "groq":
-                    module_version = version(api)
-
-                    if Version(module_version) >= parse("0.9.0"):
-                        self.override_groq_chat()
-                        self.override_groq_chat_stream()
-                    else:
-                        logger.warning(
-                            f"Only Groq>=0.9.0 supported. v{module_version} found."
-                        )
-
-    def stop_instrumenting(self):
-        self.undo_override_openai_v1_async_completion()
-        self.undo_override_openai_v1_completion()
-        self.undo_override_ollama()
-
-    def undo_override_openai_v1_completion(self):
-        global original_create
-        from openai.resources.chat import completions
-
-        completions.Completions.create = original_create
-
-    def undo_override_openai_v1_async_completion(self):
-        global original_create_async
-        from openai.resources.chat import completions
-
-        completions.AsyncCompletions.create = original_create_async
-
-    def undo_override_ollama(self):
-        if "ollama" in sys.modules:
-            import ollama
-
-            ollama.chat = original_func["ollama.chat"]
-            ollama.Client.chat = original_func["ollama.Client.chat"]
-            ollama.AsyncClient.chat = original_func["ollama.AsyncClient.chat"]
-
-    def _safe_record(self, session, event):
-        if session is not None:
-            session.record(event)
-        else:
-            self.client.record(event)
diff --git a/agentops/llms/__init__.py b/agentops/llms/__init__.py
new file mode 100644
index 00000000..e3e6f7cf
--- /dev/null
+++ b/agentops/llms/__init__.py
@@ -0,0 +1,145 @@
+import functools
+import sys
+from importlib import import_module
+from importlib.metadata import version
+
+from packaging.version import Version, parse
+
+from ..log_config import logger
+
+from .cohere import CohereProvider
+from .groq import GroqProvider
+from .litellm import LiteLLMProvider
+from .ollama import OllamaProvider
+from .openai import OpenAiProvider
+from .anthropic import AnthropicProvider
+
+original_func = {}
+original_create = None
+original_create_async = None
+
+
+class LlmTracker:
+    SUPPORTED_APIS = {
+        "litellm": {"1.3.1": ("openai_chat_completions.completion",)},
+        "openai": {
+            "1.0.0": ("chat.completions.create",),
+            "0.0.0": (
+                "ChatCompletion.create",
+                "ChatCompletion.acreate",
+            ),
+        },
+        "cohere": {
+            "5.4.0": ("chat", "chat_stream"),
+        },
+        "ollama": {"0.0.1": ("chat", "Client.chat", "AsyncClient.chat")},
+        "groq": {
+            "0.9.0": ("Client.chat", "AsyncClient.chat"),
+        },
+        "anthropic": {
+            "0.32.0": ("completions.create",),
+        },
+    }
+
+    def __init__(self, client):
+        self.client = client
+        self.completion = ""
+
+    def override_api(self):
+        """
+        Overrides key methods of the specified API to record events.
+        """
+
+        for api in self.SUPPORTED_APIS:
+            if api in sys.modules:
+                module = import_module(api)
+                if api == "litellm":
+                    module_version = version(api)
+                    if module_version is None:
+                        logger.warning(
+                            f"Cannot determine LiteLLM version. Only LiteLLM>=1.3.1 supported."
+                        )
+
+                    if Version(module_version) >= parse("1.3.1"):
+                        provider = LiteLLMProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(
+                            f"Only LiteLLM>=1.3.1 supported. v{module_version} found."
+                        )
+                    return  # If using an abstraction like litellm, do not patch the underlying LLM APIs
+
+                if api == "openai":
+                    # Patch openai v1.0.0+ methods
+                    if hasattr(module, "__version__"):
+                        module_version = parse(module.__version__)
+                        if module_version >= parse("1.0.0"):
+                            provider = OpenAiProvider(self.client)
+                            provider.override()
+                        else:
+                            raise DeprecationWarning(
+                                "OpenAI versions < 0.1 are no longer supported by AgentOps. Please upgrade OpenAI or "
+                                "downgrade AgentOps to <=0.3.8."
+                            )
+
+                if api == "cohere":
+                    # Patch cohere v5.4.0+ methods
+                    module_version = version(api)
+                    if module_version is None:
+                        logger.warning(
+                            f"Cannot determine Cohere version. Only Cohere>=5.4.0 supported."
+                        )
+
+                    if Version(module_version) >= parse("5.4.0"):
+                        provider = CohereProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(
+                            f"Only Cohere>=5.4.0 supported. v{module_version} found."
+                        )
+
+                if api == "ollama":
+                    module_version = version(api)
+
+                    if Version(module_version) >= parse("0.0.1"):
+                        provider = OllamaProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(
+                            f"Only Ollama>=0.0.1 supported. v{module_version} found."
+                        )
+
+                if api == "groq":
+                    module_version = version(api)
+
+                    if Version(module_version) >= parse("0.9.0"):
+                        provider = GroqProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(
+                            f"Only Groq>=0.9.0 supported. v{module_version} found."
+                        )
+
+                if api == "anthropic":
+                    module_version = version(api)
+
+                    if module_version is None:
+                        logger.warning(
+                            f"Cannot determine Anthropic version. Only Anthropic>=0.32.0 supported."
+                        )
+
+                    if Version(module_version) >= parse("0.32.0"):
+                        provider = AnthropicProvider(self.client)
+                        provider.override()
+                    else:
+                        logger.warning(
+                            f"Only Anthropic>=0.32.0 supported. v{module_version} found."
+                        )
+
+    def stop_instrumenting(self):
+        OpenAiProvider(self.client).undo_override()
+        GroqProvider(self.client).undo_override()
+        CohereProvider(self.client).undo_override()
+        LiteLLMProvider(self.client).undo_override()
+        OllamaProvider(self.client).undo_override()
+        AnthropicProvider(self.client).undo_override()
diff --git a/agentops/llms/anthropic.py b/agentops/llms/anthropic.py
new file mode 100644
index 00000000..cbefe054
--- /dev/null
+++ b/agentops/llms/anthropic.py
@@ -0,0 +1,289 @@
+import pprint
+from typing import Optional
+
+from agentops.llms.instrumented_provider import InstrumentedProvider
+from agentops.time_travel import fetch_completion_override_from_time_travel_cache
+
+from ..event import ErrorEvent, LLMEvent, ToolEvent
+from ..session import Session
+from ..log_config import logger
+from ..helpers import check_call_stack_for_agent_id, get_ISO_time
+from ..singleton import singleton
+
+
+@singleton
+class AnthropicProvider(InstrumentedProvider):
+
+    original_create = None
+    original_create_async = None
+
+    def __init__(self, client):
+        super().__init__(client)
+        self._provider_name = "Anthropic"
+        self.tool_event = {}
+        self.tool_id = ""
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ):
+        """Handle responses for Anthropic"""
+        from anthropic import Stream, AsyncStream
+        from anthropic.resources import AsyncMessages
+        from anthropic.types import Message
+
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            self.llm_event.session_id = session.session_id
+
+        def handle_stream_chunk(chunk: Message):
+            try:
+                # We take the first chunk and accumulate the deltas from all subsequent chunks to build one full chat completion
+                if chunk.type == "message_start":
+                    self.llm_event.returns = chunk
+                    self.llm_event.agent_id = check_call_stack_for_agent_id()
+                    self.llm_event.model = kwargs["model"]
+                    self.llm_event.prompt = kwargs["messages"]
+                    self.llm_event.prompt_tokens = chunk.message.usage.input_tokens
+                    self.llm_event.completion = {
+                        "role": chunk.message.role,
+                        "content": "",  # Always returned as [] in this instance type
+                    }
+
+                elif chunk.type == "content_block_start":
+                    if chunk.content_block.type == "text":
+                        self.llm_event.completion["content"] += chunk.content_block.text
+
+                    elif chunk.content_block.type == "tool_use":
+                        self.tool_id = chunk.content_block.id
+                        self.tool_event[self.tool_id] = ToolEvent(
+                            name=chunk.content_block.name,
+                            logs={"type": chunk.content_block.type, "input": ""},
+                        )
+
+                elif chunk.type == "content_block_delta":
+                    if chunk.delta.type == "text_delta":
+                        self.llm_event.completion["content"] += chunk.delta.text
+
+                    elif chunk.delta.type == "input_json_delta":
+                        self.tool_event[self.tool_id].logs[
+                            "input"
+                        ] += chunk.delta.partial_json
+
+                elif chunk.type == "content_block_stop":
+                    pass
+
+                elif chunk.type == "message_delta":
+                    self.llm_event.completion_tokens = chunk.usage.output_tokens
+
+                elif chunk.type == "message_stop":
+                    self.llm_event.end_timestamp = get_ISO_time()
+                    self._safe_record(session, self.llm_event)
+
+            except Exception as e:
+                self._safe_record(
+                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+                )
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n",
+                )
+
+        # if the response is a generator, decorate the generator
+        if isinstance(response, Stream):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # For asynchronous AsyncStream
+        if isinstance(response, AsyncStream):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # For async AsyncMessages
+        if isinstance(response, AsyncMessages):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # Handle object responses
+        try:
+            self.llm_event.returns = response.model_dump()
+            self.llm_event.agent_id = check_call_stack_for_agent_id()
+            self.llm_event.prompt = kwargs["messages"]
+            self.llm_event.prompt_tokens = response.usage.input_tokens
+            self.llm_event.completion = {
+                "role": "assistant",
+                "content": response.content[0].text,
+            }
+            self.llm_event.completion_tokens = response.usage.output_tokens
+            self.llm_event.model = response.model
+            self.llm_event.end_timestamp = get_ISO_time()
+
+            self._safe_record(session, self.llm_event)
+        except Exception as e:
+            self._safe_record(
+                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+            )
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def override(self):
+        self._override_completion()
+        self._override_async_completion()
+
+    def _override_completion(self):
+        from anthropic.resources import messages
+        from anthropic.types import (
+            Message,
+            RawContentBlockDeltaEvent,
+            RawContentBlockStartEvent,
+            RawContentBlockStopEvent,
+            RawMessageDeltaEvent,
+            RawMessageStartEvent,
+            RawMessageStopEvent,
+        )
+
+        # Store the original method
+        self.original_create = messages.Messages.create
+
+        def patched_function(*args, **kwargs):
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = None
+                pydantic_models = (
+                    Message,
+                    RawContentBlockDeltaEvent,
+                    RawContentBlockStartEvent,
+                    RawContentBlockStopEvent,
+                    RawMessageDeltaEvent,
+                    RawMessageStartEvent,
+                    RawMessageStopEvent,
+                )
+
+                for pydantic_model in pydantic_models:
+                    try:
+                        result_model = pydantic_model.model_validate_json(
+                            completion_override
+                        )
+                        break
+                    except Exception as e:
+                        pass
+
+                if result_model is None:
+                    logger.error(
+                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
+                        f"Time Travel: Completion override was:\n"
+                        f"{pprint.pformat(completion_override)}"
+                    )
+                    return None
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            # Call the original function with its original arguments
+            result = self.original_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        messages.Messages.create = patched_function
+
+    def _override_async_completion(self):
+        from anthropic.resources import messages
+        from anthropic.types import (
+            Message,
+            RawContentBlockDeltaEvent,
+            RawContentBlockStartEvent,
+            RawContentBlockStopEvent,
+            RawMessageDeltaEvent,
+            RawMessageStartEvent,
+            RawMessageStopEvent,
+        )
+
+        # Store the original method
+        self.original_create_async = messages.AsyncMessages.create
+
+        async def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = None
+                pydantic_models = (
+                    Message,
+                    RawContentBlockDeltaEvent,
+                    RawContentBlockStartEvent,
+                    RawContentBlockStopEvent,
+                    RawMessageDeltaEvent,
+                    RawMessageStartEvent,
+                    RawMessageStopEvent,
+                )
+
+                for pydantic_model in pydantic_models:
+                    try:
+                        result_model = pydantic_model.model_validate_json(
+                            completion_override
+                        )
+                        break
+                    except Exception as e:
+                        pass
+
+                if result_model is None:
+                    logger.error(
+                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
+                        f"Time Travel: Completion override was:\n"
+                        f"{pprint.pformat(completion_override)}"
+                    )
+                    return None
+
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            result = await self.original_create_async(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        messages.AsyncMessages.create = patched_function
+
+    def undo_override(self):
+        from anthropic.resources import messages
+
+        messages.Messages.create = self.original_create
+        messages.AsyncMessages.create = self.original_create_async
diff --git a/agentops/llms/cohere.py b/agentops/llms/cohere.py
new file mode 100644
index 00000000..79f2cb04
--- /dev/null
+++ b/agentops/llms/cohere.py
@@ -0,0 +1,258 @@
+import inspect
+import pprint
+from typing import Optional
+
+from .instrumented_provider import InstrumentedProvider
+from ..event import ActionEvent, ErrorEvent, LLMEvent
+from ..session import Session
+from ..log_config import logger
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from ..singleton import singleton
+
+
+@singleton
+class CohereProvider(InstrumentedProvider):
+    original_create = None
+    original_create_stream = None
+    original_create_async = None
+
+    def override(self):
+        self._override_chat()
+        self._override_chat_stream()
+        self._override_async_chat()
+
+    def undo_override(self):
+        import cohere
+
+        cohere.Client.chat = self.original_create
+        cohere.Client.chat_stream = self.original_create_stream
+        cohere.AsyncClient.chat = self.original_create_async
+
+    def __init__(self, client):
+        super().__init__(client)
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ):
+        """Handle responses for Cohere versions >v5.4.0"""
+        from cohere.types.streamed_chat_response import (
+            StreamedChatResponse_CitationGeneration,
+            StreamedChatResponse_SearchQueriesGeneration,
+            StreamedChatResponse_SearchResults,
+            StreamedChatResponse_StreamEnd,
+            StreamedChatResponse_StreamStart,
+            StreamedChatResponse_TextGeneration,
+            StreamedChatResponse_ToolCallsGeneration,
+        )
+
+        # from cohere.types.chat import ChatGenerationChunk
+        # NOTE: Cohere only returns one message and its role will be CHATBOT which we are coercing to "assistant"
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            self.llm_event.session_id = session.session_id
+
+        self.action_events = {}
+
+        def handle_stream_chunk(chunk, session: Optional[Session] = None):
+
+            # We take the first chunk and accumulate the deltas from all subsequent chunks to build one full chat completion
+            if isinstance(chunk, StreamedChatResponse_StreamStart):
+                self.llm_event.returns = chunk
+                self.llm_event.agent_id = check_call_stack_for_agent_id()
+                self.llm_event.model = kwargs.get("model", "command-r-plus")
+                self.llm_event.prompt = kwargs["message"]
+                self.llm_event.completion = ""
+                return
+
+            try:
+                if isinstance(chunk, StreamedChatResponse_StreamEnd):
+                    # StreamedChatResponse_TextGeneration = LLMEvent
+                    self.llm_event.completion = {
+                        "role": "assistant",
+                        "content": chunk.response.text,
+                    }
+                    self.llm_event.end_timestamp = get_ISO_time()
+                    self._safe_record(session, self.llm_event)
+
+                    # StreamedChatResponse_SearchResults = ActionEvent
+                    search_results = chunk.response.search_results
+                    if search_results:
+                        for search_result in search_results:
+                            query = search_result.search_query
+                            if query.generation_id in self.action_events:
+                                action_event = self.action_events[query.generation_id]
+                                search_result_dict = search_result.dict()
+                                del search_result_dict["search_query"]
+                                action_event.returns = search_result_dict
+                                action_event.end_timestamp = get_ISO_time()
+
+                    # StreamedChatResponse_CitationGeneration = ActionEvent
+                    if chunk.response.documents:
+                        documents = {doc["id"]: doc for doc in chunk.response.documents}
+                        citations = chunk.response.citations
+                        for citation in citations:
+                            citation_id = f"{citation.start}.{citation.end}"
+                            if citation_id in self.action_events:
+                                action_event = self.action_events[citation_id]
+                                citation_dict = citation.dict()
+                                # Replace document_ids with the actual documents
+                                citation_dict["documents"] = [
+                                    documents[doc_id]
+                                    for doc_id in citation_dict["document_ids"]
+                                    if doc_id in documents
+                                ]
+                                del citation_dict["document_ids"]
+
+                                action_event.returns = citation_dict
+                                action_event.end_timestamp = get_ISO_time()
+
+                    for key, action_event in self.action_events.items():
+                        self._safe_record(session, action_event)
+
+                elif isinstance(chunk, StreamedChatResponse_TextGeneration):
+                    self.llm_event.completion += chunk.text
+                elif isinstance(chunk, StreamedChatResponse_ToolCallsGeneration):
+                    pass
+                elif isinstance(chunk, StreamedChatResponse_CitationGeneration):
+                    for citation in chunk.citations:
+                        self.action_events[f"{citation.start}.{citation.end}"] = (
+                            ActionEvent(
+                                action_type="citation",
+                                init_timestamp=get_ISO_time(),
+                                params=citation.text,
+                            )
+                        )
+                elif isinstance(chunk, StreamedChatResponse_SearchQueriesGeneration):
+                    for query in chunk.search_queries:
+                        self.action_events[query.generation_id] = ActionEvent(
+                            action_type="search_query",
+                            init_timestamp=get_ISO_time(),
+                            params=query.text,
+                        )
+                elif isinstance(chunk, StreamedChatResponse_SearchResults):
+                    pass
+
+            except Exception as e:
+                self._safe_record(
+                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+                )
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n"
+                )
+                raise e
+
+        # NOTE: As of Cohere==5.x.x, async is not supported
+        # if the response is a generator, decorate the generator
+        if inspect.isasyncgen(response):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        elif inspect.isgenerator(response):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # TODO: we should record if they pass a chat.connectors, because it means they intended to call a tool
+        # Not enough to record StreamedChatResponse_ToolCallsGeneration because the tool may have not gotten called
+
+        try:
+            self.llm_event.returns = response
+            self.llm_event.agent_id = check_call_stack_for_agent_id()
+            self.llm_event.prompt = []
+            if response.chat_history:
+                role_map = {"USER": "user", "CHATBOT": "assistant", "SYSTEM": "system"}
+
+                for i in range(len(response.chat_history) - 1):
+                    message = response.chat_history[i]
+                    self.llm_event.prompt.append(
+                        {
+                            "role": role_map.get(message.role, message.role),
+                            "content": message.message,
+                        }
+                    )
+
+                last_message = response.chat_history[-1]
+                self.llm_event.completion = {
+                    "role": role_map.get(last_message.role, last_message.role),
+                    "content": last_message.message,
+                }
+            self.llm_event.prompt_tokens = response.meta.tokens.input_tokens
+            self.llm_event.completion_tokens = response.meta.tokens.output_tokens
+            self.llm_event.model = kwargs.get("model", "command-r-plus")
+
+            self._safe_record(session, self.llm_event)
+        except Exception as e:
+            self._safe_record(
+                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+            )
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def _override_chat(self):
+        import cohere
+
+        self.original_create = cohere.Client.chat
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = self.original_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        cohere.Client.chat = patched_function
+
+    def _override_async_chat(self):
+        import cohere.types
+
+        self.original_create_async = cohere.AsyncClient.chat
+
+        async def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = await self.original_create_async(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        cohere.AsyncClient.chat = patched_function
+
+    def _override_chat_stream(self):
+        import cohere
+
+        self.original_create_stream = cohere.Client.chat_stream
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            result = self.original_create_stream(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp)
+
+        # Override the original method with the patched one
+        cohere.Client.chat_stream = patched_function
diff --git a/agentops/llms/groq.py b/agentops/llms/groq.py
new file mode 100644
index 00000000..54e61473
--- /dev/null
+++ b/agentops/llms/groq.py
@@ -0,0 +1,182 @@
+import pprint
+from typing import Optional
+
+from .instrumented_provider import InstrumentedProvider
+from ..event import ErrorEvent, LLMEvent
+from ..session import Session
+from ..log_config import logger
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from ..singleton import singleton
+
+
+@singleton
+class GroqProvider(InstrumentedProvider):
+    original_create = None
+    original_async_create = None
+
+    def __init__(self, client):
+        super().__init__(client)
+        self.client = client
+
+    def override(self):
+        self._override_chat()
+        self._override_async_chat()
+
+    def undo_override(self):
+        from groq.resources.chat import completions
+
+        completions.Completions.create = self.original_create
+        completions.AsyncCompletions.create = self.original_create
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ):
+        """Handle responses for OpenAI versions >v1.0.0"""
+        from groq import AsyncStream, Stream
+        from groq.resources.chat import AsyncCompletions
+        from groq.types.chat import ChatCompletionChunk
+
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            self.llm_event.session_id = session.session_id
+
+        def handle_stream_chunk(chunk: ChatCompletionChunk):
+            # NOTE: prompt/completion usage not returned in response when streaming
+            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
+            if self.llm_event.returns == None:
+                self.llm_event.returns = chunk
+
+            try:
+                accumulated_delta = self.llm_event.returns.choices[0].delta
+                self.llm_event.agent_id = check_call_stack_for_agent_id()
+                self.llm_event.model = chunk.model
+                self.llm_event.prompt = kwargs["messages"]
+
+                # NOTE: We assume for completion only choices[0] is relevant
+                choice = chunk.choices[0]
+
+                if choice.delta.content:
+                    accumulated_delta.content += choice.delta.content
+
+                if choice.delta.role:
+                    accumulated_delta.role = choice.delta.role
+
+                if choice.delta.tool_calls:
+                    accumulated_delta.tool_calls = choice.delta.tool_calls
+
+                if choice.delta.function_call:
+                    accumulated_delta.function_call = choice.delta.function_call
+
+                if choice.finish_reason:
+                    # Streaming is done. Record LLMEvent
+                    self.llm_event.returns.choices[0].finish_reason = (
+                        choice.finish_reason
+                    )
+                    self.llm_event.completion = {
+                        "role": accumulated_delta.role,
+                        "content": accumulated_delta.content,
+                        "function_call": accumulated_delta.function_call,
+                        "tool_calls": accumulated_delta.tool_calls,
+                    }
+                    self.llm_event.end_timestamp = get_ISO_time()
+
+                    self._safe_record(session, self.llm_event)
+            except Exception as e:
+                self._safe_record(
+                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+                )
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n"
+                )
+
+        # if the response is a generator, decorate the generator
+        if isinstance(response, Stream):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # For asynchronous AsyncStream
+        elif isinstance(response, AsyncStream):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # For async AsyncCompletion
+        elif isinstance(response, AsyncCompletions):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # v1.0.0+ responses are objects
+        try:
+            self.llm_event.returns = response.model_dump()
+            self.llm_event.agent_id = check_call_stack_for_agent_id()
+            self.llm_event.prompt = kwargs["messages"]
+            self.llm_event.prompt_tokens = response.usage.prompt_tokens
+            self.llm_event.completion = response.choices[0].message.model_dump()
+            self.llm_event.completion_tokens = response.usage.completion_tokens
+            self.llm_event.model = response.model
+
+            self._safe_record(session, self.llm_event)
+        except Exception as e:
+            self._safe_record(
+                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+            )
+
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def _override_chat(self):
+        from groq.resources.chat import completions
+
+        self.original_create = completions.Completions.create
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+            result = self.original_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        completions.Completions.create = patched_function
+
+    def _override_async_chat(self):
+        from groq.resources.chat import completions
+
+        self.original_async_create = completions.AsyncCompletions.create
+
+        async def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            result = await self.original_async_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp)
+
+        # Override the original method with the patched one
+        completions.AsyncCompletions.create = patched_function
diff --git a/agentops/llms/instrumented_provider.py b/agentops/llms/instrumented_provider.py
new file mode 100644
index 00000000..f33f397a
--- /dev/null
+++ b/agentops/llms/instrumented_provider.py
@@ -0,0 +1,38 @@
+from abc import ABC, abstractmethod
+from typing import Optional
+
+from ..session import Session
+from ..event import LLMEvent
+
+
+class InstrumentedProvider(ABC):
+    _provider_name: str = "InstrumentedModel"
+    llm_event: Optional[LLMEvent] = None
+    client = None
+
+    def __init__(self, client):
+        self.client = client
+
+    @abstractmethod
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ) -> dict:
+        pass
+
+    @abstractmethod
+    def override(self):
+        pass
+
+    @abstractmethod
+    def undo_override(self):
+        pass
+
+    @property
+    def provider_name(self):
+        return self._provider_name
+
+    def _safe_record(self, session, event):
+        if session is not None:
+            session.record(event)
+        else:
+            self.client.record(event)
diff --git a/agentops/llms/litellm.py b/agentops/llms/litellm.py
new file mode 100644
index 00000000..39836c10
--- /dev/null
+++ b/agentops/llms/litellm.py
@@ -0,0 +1,241 @@
+import pprint
+from typing import Optional
+
+from ..log_config import logger
+from ..event import LLMEvent, ErrorEvent
+from ..session import Session
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from agentops.llms.instrumented_provider import InstrumentedProvider
+from agentops.time_travel import fetch_completion_override_from_time_travel_cache
+from ..singleton import singleton
+
+
+@singleton
+class LiteLLMProvider(InstrumentedProvider):
+    original_create = None
+    original_create_async = None
+    original_oai_create = None
+    original_oai_create_async = None
+
+    def __init__(self, client):
+        super().__init__(client)
+
+    def override(self):
+        self._override_async_completion()
+        self._override_completion()
+
+    def undo_override(self):
+        import litellm
+        from openai.resources.chat import completions
+
+        litellm.acompletion = self.original_create_async
+        litellm.completion = self.original_create
+
+        completions.Completions.create = self.original_oai_create
+        completions.AsyncCompletions.create = self.original_oai_create_async
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ) -> dict:
+        """Handle responses for OpenAI versions >v1.0.0"""
+        from openai import AsyncStream, Stream
+        from openai.resources import AsyncCompletions
+        from openai.types.chat import ChatCompletionChunk
+        from litellm.utils import CustomStreamWrapper
+
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            self.llm_event.session_id = session.session_id
+
+        def handle_stream_chunk(chunk: ChatCompletionChunk):
+            # NOTE: prompt/completion usage not returned in response when streaming
+            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
+            if self.llm_event.returns == None:
+                self.llm_event.returns = chunk
+
+            try:
+                accumulated_delta = self.llm_event.returns.choices[0].delta
+                self.llm_event.agent_id = check_call_stack_for_agent_id()
+                self.llm_event.model = chunk.model
+                self.llm_event.prompt = kwargs["messages"]
+
+                # NOTE: We assume for completion only choices[0] is relevant
+                choice = chunk.choices[0]
+
+                if choice.delta.content:
+                    accumulated_delta.content += choice.delta.content
+
+                if choice.delta.role:
+                    accumulated_delta.role = choice.delta.role
+
+                if choice.delta.tool_calls:
+                    accumulated_delta.tool_calls = choice.delta.tool_calls
+
+                if choice.delta.function_call:
+                    accumulated_delta.function_call = choice.delta.function_call
+
+                if choice.finish_reason:
+                    # Streaming is done. Record LLMEvent
+                    self.llm_event.returns.choices[0].finish_reason = (
+                        choice.finish_reason
+                    )
+                    self.llm_event.completion = {
+                        "role": accumulated_delta.role,
+                        "content": accumulated_delta.content,
+                        "function_call": accumulated_delta.function_call,
+                        "tool_calls": accumulated_delta.tool_calls,
+                    }
+                    self.llm_event.end_timestamp = get_ISO_time()
+
+                    self._safe_record(session, self.llm_event)
+            except Exception as e:
+                self._safe_record(
+                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+                )
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n"
+                )
+
+        # if the response is a generator, decorate the generator
+        if isinstance(response, Stream):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # litellm uses a CustomStreamWrapper
+        if isinstance(response, CustomStreamWrapper):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # For asynchronous AsyncStream
+        elif isinstance(response, AsyncStream):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # For async AsyncCompletion
+        elif isinstance(response, AsyncCompletions):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # v1.0.0+ responses are objects
+        try:
+            self.llm_event.returns = response
+            self.llm_event.agent_id = check_call_stack_for_agent_id()
+            self.llm_event.prompt = kwargs["messages"]
+            self.llm_event.prompt_tokens = response.usage.prompt_tokens
+            self.llm_event.completion = response.choices[0].message.model_dump()
+            self.llm_event.completion_tokens = response.usage.completion_tokens
+            self.llm_event.model = response.model
+
+            self._safe_record(session, self.llm_event)
+        except Exception as e:
+            self._safe_record(
+                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+            )
+
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def _override_completion(self):
+        import litellm
+        from openai.types.chat import (
+            ChatCompletion,
+        )  # Note: litellm calls all LLM APIs using the OpenAI format
+        from openai.resources.chat import completions
+
+        self.original_create = litellm.completion
+        self.original_oai_create = completions.Completions.create
+
+        def patched_function(*args, **kwargs):
+            init_timestamp = get_ISO_time()
+
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = ChatCompletion.model_validate_json(completion_override)
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
+            # if prompt_override:
+            #     kwargs["messages"] = prompt_override["messages"]
+
+            # Call the original function with its original arguments
+            result = self.original_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        litellm.completion = patched_function
+
+    def _override_async_completion(self):
+        import litellm
+        from openai.types.chat import (
+            ChatCompletion,
+        )  # Note: litellm calls all LLM APIs using the OpenAI format
+        from openai.resources.chat import completions
+
+        self.original_create_async = litellm.acompletion
+        self.original_oai_create_async = completions.AsyncCompletions.create
+
+        async def patched_function(*args, **kwargs):
+            init_timestamp = get_ISO_time()
+
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = ChatCompletion.model_validate_json(completion_override)
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
+            # if prompt_override:
+            #     kwargs["messages"] = prompt_override["messages"]
+
+            # Call the original function with its original arguments
+            result = await self.original_create_async(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        litellm.acompletion = patched_function
diff --git a/agentops/llms/ollama.py b/agentops/llms/ollama.py
new file mode 100644
index 00000000..6b3a8f84
--- /dev/null
+++ b/agentops/llms/ollama.py
@@ -0,0 +1,120 @@
+import inspect
+import sys
+from typing import Optional
+
+from ..event import LLMEvent
+from ..session import Session
+from agentops.helpers import get_ISO_time, check_call_stack_for_agent_id
+from .instrumented_provider import InstrumentedProvider
+from ..singleton import singleton
+
+original_func = {}
+
+
+@singleton
+class OllamaProvider(InstrumentedProvider):
+    original_create = None
+    original_create_async = None
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ) -> dict:
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+
+        def handle_stream_chunk(chunk: dict):
+            message = chunk.get("message", {"role": None, "content": ""})
+
+            if chunk.get("done"):
+                self.llm_event.completion["content"] += message.get("content")
+                self.llm_event.end_timestamp = get_ISO_time()
+                self.llm_event.model = f'ollama/{chunk.get("model")}'
+                self.llm_event.returns = chunk
+                self.llm_event.returns["message"] = self.llm_event.completion
+                self.llm_event.prompt = kwargs["messages"]
+                self.llm_event.agent_id = check_call_stack_for_agent_id()
+                self.client.record(self.llm_event)
+
+            if self.llm_event.completion is None:
+                self.llm_event.completion = message
+            else:
+                self.llm_event.completion["content"] += message.get("content")
+
+        if inspect.isgenerator(response):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        self.llm_event.end_timestamp = get_ISO_time()
+
+        self.llm_event.model = f'ollama/{response["model"]}'
+        self.llm_event.returns = response
+        self.llm_event.agent_id = check_call_stack_for_agent_id()
+        self.llm_event.prompt = kwargs["messages"]
+        self.llm_event.completion = response["message"]
+
+        self._safe_record(session, self.llm_event)
+        return response
+
+    def override(self):
+        self._override_chat_client()
+        self._override_chat()
+        self._override_chat_async_client()
+
+    def undo_override(self):
+        if "ollama" in sys.modules:
+            import ollama
+
+            ollama.chat = original_func["ollama.chat"]
+            ollama.Client.chat = original_func["ollama.Client.chat"]
+            ollama.AsyncClient.chat = original_func["ollama.AsyncClient.chat"]
+
+    def __init__(self, client):
+        super().__init__(client)
+
+    def _override_chat(self):
+        import ollama
+
+        original_func["ollama.chat"] = ollama.chat
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            result = original_func["ollama.chat"](*args, **kwargs)
+            return self.handle_response(
+                result, kwargs, init_timestamp, session=kwargs.get("session", None)
+            )
+
+        # Override the original method with the patched one
+        ollama.chat = patched_function
+
+    def _override_chat_client(self):
+        from ollama import Client
+
+        original_func["ollama.Client.chat"] = Client.chat
+
+        def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            result = original_func["ollama.Client.chat"](*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp)
+
+        # Override the original method with the patched one
+        Client.chat = patched_function
+
+    def _override_chat_async_client(self):
+        from ollama import AsyncClient
+
+        original_func["ollama.AsyncClient.chat"] = AsyncClient.chat
+
+        async def patched_function(*args, **kwargs):
+            # Call the original function with its original arguments
+            init_timestamp = get_ISO_time()
+            result = await original_func["ollama.AsyncClient.chat"](*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp)
+
+        # Override the original method with the patched one
+        AsyncClient.chat = patched_function
diff --git a/agentops/llms/openai.py b/agentops/llms/openai.py
new file mode 100644
index 00000000..fc16ab01
--- /dev/null
+++ b/agentops/llms/openai.py
@@ -0,0 +1,257 @@
+import inspect
+import pprint
+from typing import Optional
+
+from agentops.llms.instrumented_provider import InstrumentedProvider
+from agentops.time_travel import fetch_completion_override_from_time_travel_cache
+
+from ..event import ActionEvent, ErrorEvent, LLMEvent
+from ..session import Session
+from ..log_config import logger
+from ..helpers import check_call_stack_for_agent_id, get_ISO_time
+from ..singleton import singleton
+
+
+@singleton
+class OpenAiProvider(InstrumentedProvider):
+
+    original_create = None
+    original_create_async = None
+
+    def __init__(self, client):
+        super().__init__(client)
+        self._provider_name = "OpenAI"
+
+    def handle_response(
+        self, response, kwargs, init_timestamp, session: Optional[Session] = None
+    ) -> dict:
+        """Handle responses for OpenAI versions >v1.0.0"""
+        from openai import AsyncStream, Stream
+        from openai.resources import AsyncCompletions
+        from openai.types.chat import ChatCompletionChunk
+
+        self.llm_event = LLMEvent(init_timestamp=init_timestamp, params=kwargs)
+        if session is not None:
+            self.llm_event.session_id = session.session_id
+
+        def handle_stream_chunk(chunk: ChatCompletionChunk):
+            # NOTE: prompt/completion usage not returned in response when streaming
+            # We take the first ChatCompletionChunk and accumulate the deltas from all subsequent chunks to build one full chat completion
+            if self.llm_event.returns == None:
+                self.llm_event.returns = chunk
+
+            try:
+                accumulated_delta = self.llm_event.returns.choices[0].delta
+                self.llm_event.agent_id = check_call_stack_for_agent_id()
+                self.llm_event.model = chunk.model
+                self.llm_event.prompt = kwargs["messages"]
+
+                # NOTE: We assume for completion only choices[0] is relevant
+                choice = chunk.choices[0]
+
+                if choice.delta.content:
+                    accumulated_delta.content += choice.delta.content
+
+                if choice.delta.role:
+                    accumulated_delta.role = choice.delta.role
+
+                if choice.delta.tool_calls:
+                    accumulated_delta.tool_calls = choice.delta.tool_calls
+
+                if choice.delta.function_call:
+                    accumulated_delta.function_call = choice.delta.function_call
+
+                if choice.finish_reason:
+                    # Streaming is done. Record LLMEvent
+                    self.llm_event.returns.choices[0].finish_reason = (
+                        choice.finish_reason
+                    )
+                    self.llm_event.completion = {
+                        "role": accumulated_delta.role,
+                        "content": accumulated_delta.content,
+                        "function_call": accumulated_delta.function_call,
+                        "tool_calls": accumulated_delta.tool_calls,
+                    }
+                    self.llm_event.end_timestamp = get_ISO_time()
+
+                    self._safe_record(session, self.llm_event)
+            except Exception as e:
+                self._safe_record(
+                    session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+                )
+
+                kwargs_str = pprint.pformat(kwargs)
+                chunk = pprint.pformat(chunk)
+                logger.warning(
+                    f"Unable to parse a chunk for LLM call. Skipping upload to AgentOps\n"
+                    f"chunk:\n {chunk}\n"
+                    f"kwargs:\n {kwargs_str}\n"
+                )
+
+        # if the response is a generator, decorate the generator
+        if isinstance(response, Stream):
+
+            def generator():
+                for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return generator()
+
+        # For asynchronous AsyncStream
+        elif isinstance(response, AsyncStream):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # For async AsyncCompletion
+        elif isinstance(response, AsyncCompletions):
+
+            async def async_generator():
+                async for chunk in response:
+                    handle_stream_chunk(chunk)
+                    yield chunk
+
+            return async_generator()
+
+        # v1.0.0+ responses are objects
+        try:
+            self.llm_event.returns = response
+            self.llm_event.agent_id = check_call_stack_for_agent_id()
+            self.llm_event.prompt = kwargs["messages"]
+            self.llm_event.prompt_tokens = response.usage.prompt_tokens
+            self.llm_event.completion = response.choices[0].message.model_dump()
+            self.llm_event.completion_tokens = response.usage.completion_tokens
+            self.llm_event.model = response.model
+
+            self._safe_record(session, self.llm_event)
+        except Exception as e:
+            self._safe_record(
+                session, ErrorEvent(trigger_event=self.llm_event, exception=e)
+            )
+
+            kwargs_str = pprint.pformat(kwargs)
+            response = pprint.pformat(response)
+            logger.warning(
+                f"Unable to parse response for LLM call. Skipping upload to AgentOps\n"
+                f"response:\n {response}\n"
+                f"kwargs:\n {kwargs_str}\n"
+            )
+
+        return response
+
+    def override(self):
+        self._override_openai_v1_completion()
+        self._override_openai_v1_async_completion()
+
+    def _override_openai_v1_completion(self):
+        from openai.resources.chat import completions
+        from openai.types.chat import ChatCompletion, ChatCompletionChunk
+
+        # Store the original method
+        self.original_create = completions.Completions.create
+
+        def patched_function(*args, **kwargs):
+            init_timestamp = get_ISO_time()
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = None
+                pydantic_models = (ChatCompletion, ChatCompletionChunk)
+                for pydantic_model in pydantic_models:
+                    try:
+                        result_model = pydantic_model.model_validate_json(
+                            completion_override
+                        )
+                        break
+                    except Exception as e:
+                        pass
+
+                if result_model is None:
+                    logger.error(
+                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
+                        f"Time Travel: Completion override was:\n"
+                        f"{pprint.pformat(completion_override)}"
+                    )
+                    return None
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
+            # if prompt_override:
+            #     kwargs["messages"] = prompt_override["messages"]
+
+            # Call the original function with its original arguments
+            result = self.original_create(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        completions.Completions.create = patched_function
+
+    def _override_openai_v1_async_completion(self):
+        from openai.resources.chat import completions
+        from openai.types.chat import ChatCompletion, ChatCompletionChunk
+
+        # Store the original method
+        self.original_create_async = completions.AsyncCompletions.create
+
+        async def patched_function(*args, **kwargs):
+
+            init_timestamp = get_ISO_time()
+
+            session = kwargs.get("session", None)
+            if "session" in kwargs.keys():
+                del kwargs["session"]
+
+            completion_override = fetch_completion_override_from_time_travel_cache(
+                kwargs
+            )
+            if completion_override:
+                result_model = None
+                pydantic_models = (ChatCompletion, ChatCompletionChunk)
+                for pydantic_model in pydantic_models:
+                    try:
+                        result_model = pydantic_model.model_validate_json(
+                            completion_override
+                        )
+                        break
+                    except Exception as e:
+                        pass
+
+                if result_model is None:
+                    logger.error(
+                        f"Time Travel: Pydantic validation failed for {pydantic_models} \n"
+                        f"Time Travel: Completion override was:\n"
+                        f"{pprint.pformat(completion_override)}"
+                    )
+                    return None
+                return self.handle_response(
+                    result_model, kwargs, init_timestamp, session=session
+                )
+
+            # prompt_override = fetch_prompt_override_from_time_travel_cache(kwargs)
+            # if prompt_override:
+            #     kwargs["messages"] = prompt_override["messages"]
+
+            # Call the original function with its original arguments
+            result = await original_create_async(*args, **kwargs)
+            return self.handle_response(result, kwargs, init_timestamp, session=session)
+
+        # Override the original method with the patched one
+        completions.AsyncCompletions.create = patched_function
+
+    def undo_override(self):
+        from openai.resources.chat import completions
+
+        completions.AsyncCompletions.create = self.original_create_async
+        completions.Completions.create = self.original_create
diff --git a/agentops/singleton.py b/agentops/singleton.py
new file mode 100644
index 00000000..8200510e
--- /dev/null
+++ b/agentops/singleton.py
@@ -0,0 +1,30 @@
+ao_instances = {}
+
+
+def singleton(class_):
+
+    def getinstance(*args, **kwargs):
+        if class_ not in ao_instances:
+            ao_instances[class_] = class_(*args, **kwargs)
+        return ao_instances[class_]
+
+    return getinstance
+
+
+def conditional_singleton(class_):
+
+    def getinstance(*args, **kwargs):
+        use_singleton = kwargs.pop("use_singleton", True)
+        if use_singleton:
+            if class_ not in ao_instances:
+                ao_instances[class_] = class_(*args, **kwargs)
+            return ao_instances[class_]
+        else:
+            return class_(*args, **kwargs)
+
+    return getinstance
+
+
+def clear_singletons():
+    global ao_instances
+    ao_instances = {}
diff --git a/agentops/time_travel.py b/agentops/time_travel.py
index caf7887c..14e8b2af 100644
--- a/agentops/time_travel.py
+++ b/agentops/time_travel.py
@@ -3,7 +3,7 @@
 import os
 from .http_client import HttpClient
 from .exceptions import ApiServerException
-from .helpers import singleton
+from .singleton import singleton
 
 
 @singleton
diff --git a/docs/v1/usage/tracking-llm-calls.mdx b/docs/v1/usage/tracking-llm-calls.mdx
index d51137ac..9cb93ab8 100644
--- a/docs/v1/usage/tracking-llm-calls.mdx
+++ b/docs/v1/usage/tracking-llm-calls.mdx
@@ -5,8 +5,8 @@ description: "Tracking LLM Calls using the AgentOps SDK"
 
 ### How it works
 
-When the AgentOps SDK detects the `openai`, `litellm`, or `cohere` as installed modules, it will automatically
-start tracking their usage. No further work is required from you! 😊
+When the AgentOps SDK detects a supported LLM provider module installed, it will automatically
+start tracking its usage. No further work is required from you! 😊
 
 ### Not working? 
 
@@ -28,6 +28,14 @@ To get started, just follow the quick start guide.
   </Card>
 </CardGroup>
 
+### Stop Tracking LLM Calls
+
+To stop tracking LLM calls after running `agentops.init()`, you can call `agentops.stop_instrumenting()`.
+
+This function reverts the changes made to your LLM Provider's module, removing AgentOps instrumentation.
+
+_Special consideration for Cohere: Calling `stop_instrumenting()` has no effect on previously instantiated Cohere clients. You must create a new Cohere client after calling this function._
+
 <script type="module" src="/scripts/github_stars.js"></script>
 <script type="module" src="/scripts/link_to_api_button.js"></script>
 <script type="module" src="/scripts/scroll-img-fadein-animation.js"></script>
diff --git a/examples/README.md b/examples/README.md
index 62c35dd3..7bc75c15 100644
--- a/examples/README.md
+++ b/examples/README.md
@@ -8,4 +8,6 @@
 ## Integrations
 - [Using Langchain](./langchain_examples.ipynb)
 - [Crew.ai](https://github.com/joaomdmoura/crewAI-examples/tree/main/markdown_validator)
-  - Crew is a framework for developing agents, a number of their example projects use AgentOps
\ No newline at end of file
+  - Crew is a framework for developing agents, a number of their example projects use AgentOps
+- [Cohere](./cohere_example.ipynb)
+- [Anthropic](./anthropic_example.ipynb)
\ No newline at end of file
diff --git a/examples/anthropic/anthropic_example.ipynb b/examples/anthropic/anthropic_example.ipynb
new file mode 100644
index 00000000..3f287621
--- /dev/null
+++ b/examples/anthropic/anthropic_example.ipynb
@@ -0,0 +1,235 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# Anthropic Example"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "First let's install the required packages"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%pip install -U anthropic\n",
+    "%pip install -U agentops"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Then import them"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from anthropic import Anthropic, AsyncAnthropic\n",
+    "import agentops\n",
+    "import os\n",
+    "from dotenv import load_dotenv"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "Next, we'll grab our API keys. You can use dotenv like below or however else you like to load environment variables"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "load_dotenv()\n",
+    "ANTHROPIC_API_KEY = os.getenv(\"ANTHROPIC_API_KEY\") or \"<your_anthropic_key>\"\n",
+    "AGENTOPS_API_KEY = os.getenv(\"AGENTOPS_API_KEY\") or \"<your_agentops_key>\""
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.init(AGENTOPS_API_KEY, default_tags=[\"anthropic-example\"])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Sync Example\n",
+    "We will demonstrate a basic sync call to Anthropic using the Claude 3.5 Sonnet model."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "client = Anthropic()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream = client.messages.create(\n",
+    "    max_tokens=1024,\n",
+    "    model=\"claude-3-5-sonnet-20240620\",\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"What is 2+2?\",\n",
+    "        }\n",
+    "    ],\n",
+    "    stream=True,\n",
+    ")\n",
+    "\n",
+    "response = \"\"\n",
+    "for event in stream:\n",
+    "    if event.type == \"content_block_delta\":\n",
+    "        response += event.delta.text\n",
+    "    elif event.type == \"message_stop\":\n",
+    "        print(response)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Async Example\n",
+    "The async example is very similar to the sync example, but it uses the AsyncAnthropic client from the anthropic library."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "aclient = AsyncAnthropic()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "async def main() -> None:\n",
+    "    message = await aclient.messages.create(\n",
+    "        max_tokens=1024,\n",
+    "        messages=[\n",
+    "            {\n",
+    "                \"role\": \"user\",\n",
+    "                \"content\": \"Explain everything you know about the Leibniz Equation.\",\n",
+    "            }\n",
+    "        ],\n",
+    "        model=\"claude-3-5-sonnet-20240620\",\n",
+    "    )\n",
+    "    print(message.content[0].text)\n",
+    "\n",
+    "\n",
+    "await main()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Tool Calling Example\n",
+    "Anthropic models support tool calling, which allows the model to call external APIs."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "stream = client.messages.create(\n",
+    "    max_tokens=1024,\n",
+    "    model=\"claude-3-5-sonnet-20240620\",\n",
+    "    tools=[\n",
+    "        {\n",
+    "            \"name\": \"web_search\",\n",
+    "            \"description\": \"Search the web for information\",\n",
+    "            \"input_schema\": {\n",
+    "                \"type\": \"object\",\n",
+    "                \"properties\": {\n",
+    "                    \"query\": {\"type\": \"string\", \"description\": \"The search query\"}\n",
+    "                },\n",
+    "                \"required\": [\"query\"],\n",
+    "            },\n",
+    "        }\n",
+    "    ],\n",
+    "    messages=[\n",
+    "        {\n",
+    "            \"role\": \"user\",\n",
+    "            \"content\": \"Tell me everything you can about AgentOps\",\n",
+    "        }\n",
+    "    ],\n",
+    "    stream=True,\n",
+    ")\n",
+    "\n",
+    "response = \"\"\n",
+    "for event in stream:\n",
+    "    if event.type == \"content_block_delta\":\n",
+    "        if event.delta.type == \"text\":\n",
+    "            response += event.delta.text\n",
+    "    elif event.type == \"message_stop\":\n",
+    "        print(response)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "agentops.end_session(\"Success\")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "ops",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.9.19"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/tests/core_manual_tests/canary.py b/tests/core_manual_tests/canary.py
index 325cb840..72e2e357 100644
--- a/tests/core_manual_tests/canary.py
+++ b/tests/core_manual_tests/canary.py
@@ -4,7 +4,7 @@
 from agentops import ActionEvent
 
 load_dotenv()
-agentops.init()
+agentops.init(default_tags=["canary"])
 openai = OpenAI()
 
 messages = [{"role": "user", "content": "Hello"}]
diff --git a/tests/core_manual_tests/providers/anthropic_canary.py b/tests/core_manual_tests/providers/anthropic_canary.py
new file mode 100644
index 00000000..bc505606
--- /dev/null
+++ b/tests/core_manual_tests/providers/anthropic_canary.py
@@ -0,0 +1,78 @@
+import asyncio
+
+import agentops
+from dotenv import load_dotenv
+import anthropic
+
+load_dotenv()
+agentops.init(default_tags=["anthropic-provider-test"])
+anthropic_client = anthropic.Anthropic()
+async_anthropic_client = anthropic.AsyncAnthropic()
+
+response = anthropic_client.messages.create(
+    max_tokens=1024,
+    model="claude-3-5-sonnet-20240620",
+    messages=[
+        {
+            "role": "user",
+            "content": "say hi",
+        }
+    ],
+)
+
+
+stream_response = anthropic_client.messages.create(
+    max_tokens=1024,
+    model="claude-3-5-sonnet-20240620",
+    messages=[
+        {
+            "role": "user",
+            "content": "asy hi 2",
+        }
+    ],
+    stream=True,
+)
+
+response = ""
+for event in stream_response:
+    if event.type == "content_block_delta":
+        response += event.delta.text
+    elif event.type == "message_stop":
+        print(response)
+
+
+async def async_test():
+    async_response = await async_anthropic_client.messages.create(
+        max_tokens=1024,
+        model="claude-3-5-sonnet-20240620",
+        messages=[
+            {
+                "role": "user",
+                "content": "say hi 3",
+            }
+        ],
+    )
+    print(async_response)
+
+
+asyncio.run(async_test())
+
+agentops.stop_instrumenting()
+
+untracked_response = anthropic_client.messages.create(
+    max_tokens=1024,
+    model="claude-3-5-sonnet-20240620",
+    messages=[
+        {
+            "role": "user",
+            "content": "say hi 4",
+        }
+    ],
+)
+
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with one LLM event
+###
diff --git a/tests/core_manual_tests/providers/cohere_canary.py b/tests/core_manual_tests/providers/cohere_canary.py
new file mode 100644
index 00000000..fcec2006
--- /dev/null
+++ b/tests/core_manual_tests/providers/cohere_canary.py
@@ -0,0 +1,45 @@
+import asyncio
+
+import agentops
+from dotenv import load_dotenv
+import cohere
+
+load_dotenv()
+agentops.init(default_tags=["cohere-provider-test"])
+co = cohere.Client()
+aco = cohere.AsyncClient()
+
+chat = co.chat(message="say hi 1")
+
+stream = co.chat_stream(message="say hi 2")
+response = ""
+for event in stream:
+    if event.event_type == "text-generation":
+        response += event.text
+        print(event.text, end="")
+    elif event.event_type == "stream-end":
+        print("\n")
+        print(event)
+        print("\n")
+
+
+async def main():
+    async_response = await aco.chat(message="say hi 3")
+    print(async_response)
+
+
+asyncio.run(main())
+
+agentops.stop_instrumenting()
+
+# for cohere, because the client is not a singleton, calling `stop_instrumenting()` only affects
+# new clients created, not existing clients.
+
+co_untracked = cohere.Client()
+chat_untracked = co_untracked.chat(message="say hi untracked")
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with one LLM event
+###
diff --git a/tests/core_manual_tests/providers/groq_canary.py b/tests/core_manual_tests/providers/groq_canary.py
new file mode 100644
index 00000000..1d91f33c
--- /dev/null
+++ b/tests/core_manual_tests/providers/groq_canary.py
@@ -0,0 +1,60 @@
+import asyncio
+
+import agentops
+from dotenv import load_dotenv
+import os
+from groq import Groq, AsyncGroq
+
+load_dotenv()
+agentops.init(default_tags=["groq-provider-test"])
+groq_client = Groq(api_key=os.getenv("GROQ_API_KEY"))
+async_groq_client = AsyncGroq(api_key=os.getenv("GROQ_API_KEY"))
+
+messages = [{"role": "user", "content": "Hello"}]
+
+# option 1: use session.patch
+res = groq_client.chat.completions.create(
+    model="llama3-70b-8192",
+    messages=[
+        {"role": "user", "content": "Say hello"},
+    ],
+)
+
+stream_res = groq_client.chat.completions.create(
+    model="llama3-70b-8192",
+    messages=[
+        {"role": "user", "content": "Say hello"},
+    ],
+    stream=True,
+)
+
+for chunk in stream_res:
+    print(chunk)
+
+
+async def async_test():
+    async_res = await async_groq_client.chat.completions.create(
+        model="llama3-70b-8192",
+        messages=[
+            {"role": "user", "content": "Say hello"},
+        ],
+    )
+    print(async_res)
+
+
+asyncio.run(async_test())
+
+agentops.stop_instrumenting()
+
+untracked_res = groq_client.chat.completions.create(
+    model="llama3-70b-8192",
+    messages=[
+        {"role": "user", "content": "Say hello"},
+    ],
+)
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with one LLM event
+###
diff --git a/tests/core_manual_tests/providers/litellm_canary.py b/tests/core_manual_tests/providers/litellm_canary.py
new file mode 100644
index 00000000..ecd2186a
--- /dev/null
+++ b/tests/core_manual_tests/providers/litellm_canary.py
@@ -0,0 +1,44 @@
+import asyncio
+
+import agentops
+from dotenv import load_dotenv
+import litellm
+
+load_dotenv()
+agentops.init(default_tags=["litellm-provider-test"])
+
+response = litellm.completion(
+    model="gpt-3.5-turbo", messages=[{"content": "Hello, how are you?", "role": "user"}]
+)
+
+stream_response = litellm.completion(
+    model="gpt-3.5-turbo",
+    messages=[{"content": "Hello, how are you?", "role": "user"}],
+    stream=True,
+)
+print(stream_response)
+for chunk in stream_response:
+    print(chunk)
+
+
+async def main():
+    async_response = await litellm.acompletion(
+        model="gpt-3.5-turbo",
+        messages=[{"content": "Hello, how are you?", "role": "user"}],
+    )
+    print(async_response)
+
+
+asyncio.run(main())
+
+agentops.stop_instrumenting()
+
+untracked_response = litellm.completion(
+    model="gpt-3.5-turbo", messages=[{"content": "Hello, how are you?", "role": "user"}]
+)
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with one LLM event
+###
diff --git a/tests/core_manual_tests/providers/ollama_canary.py b/tests/core_manual_tests/providers/ollama_canary.py
new file mode 100644
index 00000000..667a1f33
--- /dev/null
+++ b/tests/core_manual_tests/providers/ollama_canary.py
@@ -0,0 +1,58 @@
+import asyncio
+
+import agentops
+from dotenv import load_dotenv
+import ollama
+from ollama import AsyncClient
+
+load_dotenv()
+agentops.init(default_tags=["ollama-provider-test"])
+
+response = ollama.chat(
+    model="llama3.1",
+    messages=[
+        {
+            "role": "user",
+            "content": "say hello sync",
+        },
+    ],
+)
+
+stream_response = ollama.chat(
+    model="llama3.1",
+    messages=[
+        {
+            "role": "user",
+            "content": "say hello str",
+        },
+    ],
+    stream=True,
+)
+for chunk in stream_response:
+    print(chunk)
+
+
+async def main():
+    message = {"role": "user", "content": "say hello mr. async"}
+    async_response = await AsyncClient().chat(model="llama3.1", messages=[message])
+
+
+asyncio.run(main())
+
+agentops.stop_instrumenting()
+
+untracked_response = ollama.chat(
+    model="llama3.1",
+    messages=[
+        {
+            "role": "user",
+            "content": "say hello",
+        },
+    ],
+)
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with one LLM event
+###
diff --git a/tests/core_manual_tests/providers/openai_canary.py b/tests/core_manual_tests/providers/openai_canary.py
new file mode 100644
index 00000000..3244109d
--- /dev/null
+++ b/tests/core_manual_tests/providers/openai_canary.py
@@ -0,0 +1,48 @@
+import agentops
+import asyncio
+from openai import OpenAI, AsyncOpenAI
+from dotenv import load_dotenv
+from agentops import ActionEvent
+
+load_dotenv()
+agentops.init(default_tags=["openai-v1-provider-test"])
+openai = OpenAI()
+async_openai = OpenAI()
+
+
+# option 1: use session.patch
+response = openai.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Hello"}],
+    temperature=0.5,
+)
+
+stream_response = openai.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Hello streamed"}],
+    temperature=0.5,
+    stream=True,
+)
+
+for chunk in stream_response:
+    print(chunk)
+
+async_response = async_openai.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Hello async"}],
+    temperature=0.5,
+)
+
+agentops.stop_instrumenting()
+
+not_tracked_response = openai.chat.completions.create(
+    model="gpt-3.5-turbo",
+    messages=[{"role": "user", "content": "Hello untracked"}],
+    temperature=0.5,
+)
+
+agentops.end_session(end_state="Success")
+
+###
+#  Used to verify that one session is created with two LLM events
+###
diff --git a/tests/test_agent.py b/tests/test_agent.py
index abf7b12a..408161b9 100644
--- a/tests/test_agent.py
+++ b/tests/test_agent.py
@@ -1,6 +1,4 @@
 from unittest import TestCase
-from unittest.mock import patch, MagicMock
-from uuid import uuid4
 
 from agentops import track_agent
 import agentops
diff --git a/tests/test_canary.py b/tests/test_canary.py
index a89cf344..8ff49a01 100644
--- a/tests/test_canary.py
+++ b/tests/test_canary.py
@@ -3,7 +3,7 @@
 import time
 import agentops
 from agentops import ActionEvent
-from agentops.helpers import clear_singletons
+from agentops.singleton import clear_singletons
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_events.py b/tests/test_events.py
index 5234f3fa..a2d70d75 100644
--- a/tests/test_events.py
+++ b/tests/test_events.py
@@ -3,7 +3,7 @@
 import pytest
 import agentops
 from agentops import ActionEvent, ErrorEvent
-from agentops.helpers import clear_singletons
+from agentops.singleton import clear_singletons
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_record_action.py b/tests/test_record_action.py
index d3258583..bc433c55 100644
--- a/tests/test_record_action.py
+++ b/tests/test_record_action.py
@@ -4,7 +4,7 @@
 import agentops
 from agentops import record_action
 from datetime import datetime
-from agentops.helpers import clear_singletons
+from agentops.singleton import clear_singletons
 import contextlib
 
 jwts = ["some_jwt", "some_jwt2", "some_jwt3"]
diff --git a/tests/test_record_tool.py b/tests/test_record_tool.py
index c9f44471..b290e131 100644
--- a/tests/test_record_tool.py
+++ b/tests/test_record_tool.py
@@ -5,7 +5,7 @@
 from agentops import record_tool
 from datetime import datetime
 
-from agentops.helpers import clear_singletons
+from agentops.singleton import clear_singletons
 import contextlib
 
 jwts = ["some_jwt", "some_jwt2", "some_jwt3"]
diff --git a/tests/test_session.py b/tests/test_session.py
index f5462085..075aca84 100644
--- a/tests/test_session.py
+++ b/tests/test_session.py
@@ -3,8 +3,7 @@
 import time
 import agentops
 from agentops import ActionEvent, Client
-from agentops.exceptions import NoSessionException, MultiSessionException
-from agentops.helpers import clear_singletons
+from agentops.singleton import clear_singletons
 
 
 @pytest.fixture(autouse=True)
diff --git a/tests/test_singleton.py b/tests/test_singleton.py
index ff936c30..ca479090 100644
--- a/tests/test_singleton.py
+++ b/tests/test_singleton.py
@@ -1,8 +1,6 @@
 import uuid
 
-import pytest
-
-from agentops.helpers import singleton, conditional_singleton, clear_singletons
+from agentops.singleton import singleton, conditional_singleton, clear_singletons
 
 
 @singleton