diff --git a/agentops/llms/litellm.py b/agentops/llms/litellm.py index 8fa31255..b3ca3f3c 100644 --- a/agentops/llms/litellm.py +++ b/agentops/llms/litellm.py @@ -1,6 +1,8 @@ import pprint from typing import Optional +import litellm.utils + from ..log_config import logger from ..event import LLMEvent, ErrorEvent from ..session import Session @@ -96,6 +98,16 @@ def generator(): return generator() + # litellm uses a CustomStreamWrapper + if isinstance(response, litellm.utils.CustomStreamWrapper): + + def generator(): + for chunk in response: + handle_stream_chunk(chunk) + yield chunk + + return generator() + # For asynchronous AsyncStream elif isinstance(response, AsyncStream): diff --git a/tests/core_manual_tests/providers/litellm_canary.py b/tests/core_manual_tests/providers/litellm_canary.py index b97ce646..84ebd63a 100644 --- a/tests/core_manual_tests/providers/litellm_canary.py +++ b/tests/core_manual_tests/providers/litellm_canary.py @@ -1,3 +1,5 @@ +import asyncio + import agentops from dotenv import load_dotenv import litellm @@ -9,6 +11,26 @@ model="gpt-3.5-turbo", messages=[{"content": "Hello, how are you?", "role": "user"}] ) +stream_response = litellm.completion( + model="gpt-3.5-turbo", + messages=[{"content": "Hello, how are you?", "role": "user"}], + stream=True, +) +print(stream_response) +for chunk in stream_response: + print(chunk) + + +async def main(): + async_response = await litellm.acompletion( + model="gpt-3.5-turbo", + messages=[{"content": "Hello, how are you?", "role": "user"}], + ) + print(async_response) + + +asyncio.run(main()) + agentops.end_session(end_state="Success") ###