diff --git a/ee/support_sidebar_max/views.py b/ee/support_sidebar_max/views.py index 0a69c151bc689..8f3db0689edda 100644 --- a/ee/support_sidebar_max/views.py +++ b/ee/support_sidebar_max/views.py @@ -9,6 +9,7 @@ import time import anthropic import json +from datetime import datetime, UTC from rest_framework.authentication import SessionAuthentication from rest_framework.permissions import IsAuthenticated @@ -20,12 +21,11 @@ # Configure logging django_logger = logging.getLogger("django") -django_logger.setLevel(logging.DEBUG) +django_logger.setLevel(logging.INFO) # Don't add the auth header here, the Anthropic Python SDK handles it REQUIRED_HEADERS = { "anthropic-version": "2023-06-01", - "anthropic-beta": "prompt-caching-2024-07-31", "content-type": "application/json", } @@ -259,15 +259,16 @@ def _handle_tool_use(self, result: dict[str, Any], history: ConversationHistory) def send_message(self, client: anthropic.Anthropic, tools, system_prompt, messages): """Send message to Anthropic API with proper error handling""" try: - django_logger.info("Preparing to send message to Anthropic API") + django_logger.info("✨🦔 Preparing to send message to Anthropic API") try: - headers = {"anthropic-beta": "prompt-caching-2024-07-31"} - django_logger.debug("API headers prepared successfully") + headers = {} + django_logger.debug("✨🦔 API headers prepared successfully") except Exception as e: - django_logger.error(f"Error preparing API headers: {str(e)}", exc_info=True) + django_logger.error(f"✨🦔 Error preparing API headers: {str(e)}", exc_info=True) raise - response = client.messages.create( + # Use with_raw_response to get access to headers + raw_response = client.messages.with_raw_response.create( model="claude-3-5-sonnet-20241022", max_tokens=1024, tools=tools, @@ -276,24 +277,78 @@ def send_message(self, client: anthropic.Anthropic, tools, system_prompt, messag extra_headers=headers, ) - django_logger.debug(f"Response from Anthropic API: {response}") + # Get the actual message response + message = raw_response.parse() + django_logger.debug(f"✨🦔 Response from Anthropic API: {message}") + + # Log rate limit information if available + try: + # Log current capacity (for monitoring/debugging) + django_logger.info( + f"✨🦔 API Capacity - " + f"Requests: {raw_response.headers.get('anthropic-ratelimit-requests-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-requests-limit', '?')}, " + f"Input Tokens: {raw_response.headers.get('anthropic-ratelimit-input-tokens-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-input-tokens-limit', '?')}, " + f"Output Tokens: {raw_response.headers.get('anthropic-ratelimit-output-tokens-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-output-tokens-limit', '?')}" + ) + except Exception as e: + django_logger.warning(f"✨🦔 Unable to log capacity info: {str(e)}") + + # Log token usage and cache metrics + if message.usage: + input_tokens = getattr(message.usage, "input_tokens", 0) + output_tokens = getattr(message.usage, "output_tokens", 0) + cache_created = getattr(message.usage, "cache_creation_input_tokens", 0) + cache_read = getattr(message.usage, "cache_read_input_tokens", 0) + fresh_input = getattr(message.usage, "input_tokens", 0) + + django_logger.info(f"✨🦔 Request Usage - Input: {input_tokens}, Output: {output_tokens} tokens") + if cache_created or cache_read: + django_logger.info( + f"✨🦔 Cache Stats - Created: {cache_created}, Read: {cache_read}, Fresh: {fresh_input}" + ) # Extract the necessary fields from the Message object result = { - "content": [block.dict() for block in response.content] - if isinstance(response.content, list) - else response.content, - "stop_reason": response.stop_reason, - "usage": response.usage.dict() if response.usage else None, + "content": [block.dict() for block in message.content] + if isinstance(message.content, list) + else message.content, + "stop_reason": message.stop_reason, + "usage": message.usage.dict() if message.usage else None, } - django_logger.debug(f"Processed API response: {result}") + django_logger.debug(f"✨🦔 Processed API response: {result}") return result except anthropic.RateLimitError as e: - django_logger.warning(f"Rate limit exceeded: {str(e)}") - retry_after = getattr(e, "retry_after", 30) - return self._handle_rate_limit(retry_after) + try: + # Get reset time from headers if available + headers = e.response.headers if hasattr(e, "response") and hasattr(e.response, "headers") else {} + + # Try to get retry-after header first + if "retry-after" in headers: + retry_seconds = int(headers["retry-after"]) + else: + # Calculate from reset timestamp + now = datetime.now(UTC) + reset_times = [] + + for header in headers: + if header.endswith("-reset"): + try: + reset_time = datetime.fromisoformat(headers[header].rstrip("Zs")).replace(tzinfo=UTC) + wait_seconds = max(0, int((reset_time - now).total_seconds())) + reset_times.append(wait_seconds) + except (ValueError, TypeError): + continue + + retry_seconds = max(reset_times) if reset_times else 15 + + django_logger.warning(f"✨🦔 Rate limit hit - waiting {retry_seconds} seconds before retry") + return self._handle_rate_limit(retry_seconds) + + except Exception as header_error: + django_logger.warning(f"✨🦔 Rate limit handling error: {str(header_error)}") + return self._handle_rate_limit(15) # Default to 15 seconds except Exception as e: - django_logger.error(f"Request to Anthropic API failed: {str(e)}", exc_info=True) + django_logger.error(f"✨🦔 Request to Anthropic API failed: {str(e)}", exc_info=True) return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR) diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAILogic.ts b/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAILogic.ts index 91d69ee329498..bae2a07ad4e01 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAILogic.ts +++ b/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAILogic.ts @@ -4,6 +4,18 @@ import { loaders } from 'kea-loaders' import type { sidePanelMaxAILogicType } from './sidePanelMaxAILogicType' import { sidePanelMaxAPI } from './sidePanelMaxAPI' +interface RateLimit { + limit: number + remaining: number + reset: string +} + +interface RateLimits { + requests: RateLimit + input_tokens: RateLimit + output_tokens: RateLimit +} + export interface ChatMessage { role: 'user' | 'assistant' content: string @@ -14,7 +26,7 @@ export interface ChatMessage { interface MaxResponse { content: string | { text: string; type: string } - isRateLimited?: boolean + rate_limits?: RateLimits isError?: boolean } @@ -94,20 +106,37 @@ export const sidePanelMaxAILogic = kea([ const response = (await sidePanelMaxAPI.sendMessage(message)) as MaxResponse await breakpoint(100) - const content = typeof response.content === 'string' ? response.content : response.content.text + let messageContent = + typeof response.content === 'string' ? response.content : response.content.text + + // Check rate limits + const { rate_limits } = response + if (rate_limits) { + const isLimited = Object.values(rate_limits).some((limit) => limit.remaining === 0) + if (isLimited) { + actions.setRateLimited(true) + // Find the shortest reset time + const resetTimes = Object.values(rate_limits) + .map((limit) => new Date(limit.reset).getTime()) + .filter((time) => !isNaN(time)) + if (resetTimes.length > 0) { + const earliestReset = Math.min(...resetTimes) + const waitSeconds = Math.max(0, Math.ceil((earliestReset - Date.now()) / 1000)) + messageContent = `🫣 Rate limit hit! Please try again in ${waitSeconds} seconds. 🦔` + } + } + } - if (response.isRateLimited) { - actions.setRateLimited(true) - } else if (response.isError) { + if (response.isError) { actions.setServerError(true) } else { actions.setRateLimited(false) actions.setServerError(false) } - actions.appendAssistantMessage(content) + actions.appendAssistantMessage(messageContent) setTimeout(() => actions.setSearchingThinking(false), 100) - return content + return messageContent } catch (error: unknown) { if ( error && diff --git a/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAPI.ts b/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAPI.ts index e7e2385df0b98..02036d197a0e1 100644 --- a/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAPI.ts +++ b/frontend/src/layout/navigation-3000/sidepanel/panels/sidePanelMaxAPI.ts @@ -1,7 +1,24 @@ import api from 'lib/api' +interface RateLimit { + limit: number + remaining: number + reset: string +} + +interface RateLimits { + requests: RateLimit + input_tokens: RateLimit + output_tokens: RateLimit +} + +interface MaxResponse { + content: string + rate_limits: RateLimits +} + export const sidePanelMaxAPI = { - async sendMessage(message: string): Promise<{ content: string }> { + async sendMessage(message: string): Promise { // Get or create session ID using sessionStorage let sessionId = sessionStorage.getItem('max_session_id') if (!sessionId) { @@ -20,6 +37,9 @@ export const sidePanelMaxAPI = { } const data = await response.json() - return { content: data.content } + return { + content: data.content, + rate_limits: data.rate_limits, + } }, } diff --git a/requirements.in b/requirements.in index e0006601f9469..ba67e04942a84 100644 --- a/requirements.in +++ b/requirements.in @@ -113,5 +113,5 @@ xmlsec==1.3.13 # Do not change this version - it will break SAML lxml==4.9.4 # Do not change this version - it will break SAML grpcio~=1.63.2 # Version constrained so that `deepeval` can be installed in in dev tenacity~=8.4.2 # Version constrained so that `deepeval` can be installed in in dev -anthropic==0.40.0 +anthropic==0.42.0 beautifulsoup4==4.12.3 diff --git a/requirements.txt b/requirements.txt index d38b4b28af2ca..4adcf19049ebb 100644 --- a/requirements.txt +++ b/requirements.txt @@ -25,7 +25,7 @@ amqp==5.1.1 # via kombu annotated-types==0.7.0 # via pydantic -anthropic==0.40.0 +anthropic==0.42.0 # via -r requirements.in antlr4-python3-runtime==4.13.1 # via -r requirements.in