From 998ec9e23a27e9121ef7d84e2e12a19ec6853009 Mon Sep 17 00:00:00 2001 From: Howard Gil Date: Fri, 19 Apr 2024 18:51:56 -0700 Subject: [PATCH] [Feature] Better ErrorEvents (#156) * Setting session to none if server does not return 200 for /sessions * WIP. Not working * Adding ErrorEvent magic, stripping out trigger_event and exception * Modified existing ErrorEvents * Working * Removed hardcoded error * Removed resolved comments --- agentops/client.py | 6 ++---- agentops/event.py | 16 +++++++++++----- agentops/langchain_callback_handler.py | 26 ++++++++++++-------------- agentops/llm_tracker.py | 8 ++++---- 4 files changed, 29 insertions(+), 27 deletions(-) diff --git a/agentops/client.py b/agentops/client.py index a4eaedea..b97d0c55 100644 --- a/agentops/client.py +++ b/agentops/client.py @@ -150,8 +150,7 @@ def _record_event_sync(self, func, event_name, *args, **kwargs): self.record(event) except Exception as e: - # TODO: add the stack trace - self.record(ErrorEvent(trigger_event=event, details={f"{type(e).__name__}": str(e)})) + self.record(ErrorEvent(trigger_event=event, exception=e)) # Re-raise the exception raise @@ -190,8 +189,7 @@ async def _record_event_async(self, func, event_name, *args, **kwargs): self.record(event) except Exception as e: - # TODO: add the stack trace - self.record(ErrorEvent(trigger_event=event, details={f"{type(e).__name__}": str(e)})) + self.record(ErrorEvent(trigger_event=event, exception=e)) # Re-raise the exception raise diff --git a/agentops/event.py b/agentops/event.py index 76deeafc..e0f017db 100644 --- a/agentops/event.py +++ b/agentops/event.py @@ -5,11 +5,12 @@ Event: Represents discrete events to be recorded. """ -from dataclasses import dataclass, field +from dataclasses import asdict, dataclass, field from typing import List, Optional from .helpers import get_ISO_time, check_call_stack_for_agent_id from .enums import EventType, Models from uuid import UUID, uuid4 +import traceback @dataclass @@ -115,6 +116,7 @@ class ErrorEvent(): For recording any errors e.g. ones related to agent execution trigger_event(Event, optional): The event object that triggered the error if applicable. + exception(BaseException, optional): The thrown exception. We will automatically parse the error_type and details from this. error_type(str, optional): The type of error e.g. "ValueError". code(str, optional): A code that can be used to identify the error e.g. 501. details(str, optional): Detailed information about the error. @@ -123,11 +125,12 @@ class ErrorEvent(): """ - trigger_event: Optional[Event] = None # TODO: remove from serialization? + trigger_event: Optional[Event] = None + exception: Optional[BaseException] = None error_type: Optional[str] = None code: Optional[str] = None details: Optional[str] = None - logs: Optional[str] = None + logs: Optional[str] = field(default_factory=traceback.format_exc) timestamp: str = field(default_factory=get_ISO_time) def __post_init__(self): @@ -135,5 +138,8 @@ def __post_init__(self): if self.trigger_event: self.trigger_event_id = self.trigger_event.id self.trigger_event_type = self.trigger_event.event_type - # TODO: remove trigger_event from serialization - # e.g. field(repr=False, compare=False, hash=False, metadata={'serialize': False}) + self.trigger_event = None # removes trigger_event from serialization + if self.exception: + self.error_type = self.error_type or type(self.exception).__name__ + self.details = self.details or str(self.exception) + self.exception = None # removes exception from serialization diff --git a/agentops/langchain_callback_handler.py b/agentops/langchain_callback_handler.py index a79d3ea9..fda660c5 100644 --- a/agentops/langchain_callback_handler.py +++ b/agentops/langchain_callback_handler.py @@ -80,7 +80,7 @@ def on_llm_error( llm_event: LLMEvent = self.events.llm[str(run_id)] self.ao_client.record(llm_event) - error_event = ErrorEvent(trigger_event=llm_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=llm_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -106,8 +106,7 @@ def on_llm_end( if len(response.generations) == 0: # TODO: more descriptive error - error_event = ErrorEvent(trigger_event=self.events.llm[str(run_id)], - details="on_llm_end: No generations", timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=self.events.llm[str(run_id)], error_type="NoGenerations", details="on_llm_end: No generations") self.ao_client.record(error_event) @debug_print_function_params @@ -156,7 +155,7 @@ def on_chain_error( action_event: ActionEvent = self.events.chain[str(run_id)] self.ao_client.record(action_event) - error_event = ErrorEvent(trigger_event=action_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=action_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -199,7 +198,7 @@ def on_tool_end( # Tools are capable of failing `on_tool_end` quietly. # This is a workaround to make sure we can log it as an error. if kwargs.get('name') == '_Exception': - error_event = ErrorEvent(trigger_event=tool_event, details=output, timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=tool_event, error_type="LangchainToolException", details=output) self.ao_client.record(error_event) @debug_print_function_params @@ -214,7 +213,7 @@ def on_tool_error( tool_event: ToolEvent = self.events.tool[str(run_id)] self.ao_client.record(tool_event) - error_event = ErrorEvent(trigger_event=tool_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=tool_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -265,7 +264,7 @@ def on_retriever_error( action_event: ActionEvent = self.events.retriever[str(run_id)] self.ao_client.record(action_event) - error_event = ErrorEvent(trigger_event=action_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=action_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -405,7 +404,7 @@ async def on_llm_error( llm_event: LLMEvent = self.events.llm[str(run_id)] self.ao_client.record(llm_event) - error_event = ErrorEvent(trigger_event=llm_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=llm_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -431,8 +430,7 @@ async def on_llm_end( if len(response.generations) == 0: # TODO: more descriptive error - error_event = ErrorEvent(trigger_event=self.events.llm[str(run_id)], - details="on_llm_end: No generations", timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=self.events.llm[str(run_id)], error_type="NoGenerations", details="on_llm_end: No generations") self.ao_client.record(error_event) @debug_print_function_params @@ -481,7 +479,7 @@ async def on_chain_error( action_event: ActionEvent = self.events.chain[str(run_id)] self.ao_client.record(action_event) - error_event = ErrorEvent(trigger_event=action_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=action_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -524,7 +522,7 @@ async def on_tool_end( # Tools are capable of failing `on_tool_end` quietly. # This is a workaround to make sure we can log it as an error. if kwargs.get('name') == '_Exception': - error_event = ErrorEvent(trigger_event=tool_event, details=output, timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=tool_event, error_type="LangchainToolException", details=output) self.ao_client.record(error_event) @debug_print_function_params @@ -539,7 +537,7 @@ async def on_tool_error( tool_event: ToolEvent = self.events.tool[str(run_id)] self.ao_client.record(tool_event) - error_event = ErrorEvent(trigger_event=tool_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=tool_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params @@ -590,7 +588,7 @@ async def on_retriever_error( action_event: ActionEvent = self.events.retriever[str(run_id)] self.ao_client.record(action_event) - error_event = ErrorEvent(trigger_event=action_event, details=str(error), timestamp=get_ISO_time()) + error_event = ErrorEvent(trigger_event=action_event, exception=error) self.ao_client.record(error_event) @debug_print_function_params diff --git a/agentops/llm_tracker.py b/agentops/llm_tracker.py index 1aefc392..6bae7446 100644 --- a/agentops/llm_tracker.py +++ b/agentops/llm_tracker.py @@ -58,7 +58,7 @@ def handle_stream_chunk(chunk): self.client.record(self.llm_event) except Exception as e: - self.client.record(ErrorEvent(trigger_event=self.llm_event, details={f"{type(e).__name__}": str(e)})) + self.client.record(ErrorEvent(trigger_event=self.llm_event, exception=e)) # TODO: This error is specific to only one path of failure. Should be more generic or have different logging for different paths logging.warning( f"🖇 AgentOps: Unable to parse a chunk for LLM call {kwargs} - skipping upload to AgentOps") @@ -97,7 +97,7 @@ def generator(): self.client.record(self.llm_event) except Exception as e: - self.client.record(ErrorEvent(trigger_event=self.llm_event, details={f"{type(e).__name__}": str(e)})) + self.client.record(ErrorEvent(trigger_event=self.llm_event, exception=e)) # TODO: This error is specific to only one path of failure. Should be more generic or have different logging for different paths logging.warning( f"🖇 AgentOps: Unable to parse a chunk for LLM call {kwargs} - skipping upload to AgentOps") @@ -143,7 +143,7 @@ def handle_stream_chunk(chunk: ChatCompletionChunk): self.client.record(self.llm_event) except Exception as e: - self.client.record(ErrorEvent(trigger_event=self.llm_event, details={f"{type(e).__name__}": str(e)})) + self.client.record(ErrorEvent(trigger_event=self.llm_event, exception=e)) # TODO: This error is specific to only one path of failure. Should be more generic or have different logging for different paths logging.warning( f"🖇 AgentOps: Unable to parse a chunk for LLM call {kwargs} - skipping upload to AgentOps") @@ -188,7 +188,7 @@ async def async_generator(): self.client.record(self.llm_event) except Exception as e: - self.client.record(ErrorEvent(trigger_event=self.llm_event, details={f"{type(e).__name__}": str(e)})) + self.client.record(ErrorEvent(trigger_event=self.llm_event, exception=e)) # TODO: This error is specific to only one path of failure. Should be more generic or have different logging for different paths logging.warning( f"🖇 AgentOps: Unable to parse a chunk for LLM call {kwargs} - skipping upload to AgentOps")