Skip to content

Commit

Permalink
upgrading to Anthropic Python SDK v 0.42.0, corrected some logging, d…
Browse files Browse the repository at this point in the history
…ialed back logging verbosity.
  • Loading branch information
slshults committed Dec 23, 2024
1 parent e489b1e commit ab99ce4
Show file tree
Hide file tree
Showing 5 changed files with 133 additions and 29 deletions.
91 changes: 73 additions & 18 deletions ee/support_sidebar_max/views.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,7 @@
import time
import anthropic
import json
from datetime import datetime, UTC

from rest_framework.authentication import SessionAuthentication
from rest_framework.permissions import IsAuthenticated
Expand All @@ -20,12 +21,11 @@

# Configure logging
django_logger = logging.getLogger("django")
django_logger.setLevel(logging.DEBUG)
django_logger.setLevel(logging.INFO)

# Don't add the auth header here, the Anthropic Python SDK handles it
REQUIRED_HEADERS = {
"anthropic-version": "2023-06-01",
"anthropic-beta": "prompt-caching-2024-07-31",
"content-type": "application/json",
}

Expand Down Expand Up @@ -259,15 +259,16 @@ def _handle_tool_use(self, result: dict[str, Any], history: ConversationHistory)
def send_message(self, client: anthropic.Anthropic, tools, system_prompt, messages):
"""Send message to Anthropic API with proper error handling"""
try:
django_logger.info("Preparing to send message to Anthropic API")
django_logger.info("✨🦔 Preparing to send message to Anthropic API")
try:
headers = {"anthropic-beta": "prompt-caching-2024-07-31"}
django_logger.debug("API headers prepared successfully")
headers = {}
django_logger.debug("✨🦔 API headers prepared successfully")
except Exception as e:
django_logger.error(f"Error preparing API headers: {str(e)}", exc_info=True)
django_logger.error(f"✨🦔 Error preparing API headers: {str(e)}", exc_info=True)
raise

response = client.messages.create(
# Use with_raw_response to get access to headers
raw_response = client.messages.with_raw_response.create(
model="claude-3-5-sonnet-20241022",
max_tokens=1024,
tools=tools,
Expand All @@ -276,24 +277,78 @@ def send_message(self, client: anthropic.Anthropic, tools, system_prompt, messag
extra_headers=headers,
)

django_logger.debug(f"Response from Anthropic API: {response}")
# Get the actual message response
message = raw_response.parse()
django_logger.debug(f"✨🦔 Response from Anthropic API: {message}")

# Log rate limit information if available
try:
# Log current capacity (for monitoring/debugging)
django_logger.info(
f"✨🦔 API Capacity - "
f"Requests: {raw_response.headers.get('anthropic-ratelimit-requests-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-requests-limit', '?')}, "
f"Input Tokens: {raw_response.headers.get('anthropic-ratelimit-input-tokens-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-input-tokens-limit', '?')}, "
f"Output Tokens: {raw_response.headers.get('anthropic-ratelimit-output-tokens-remaining', '?')}/{raw_response.headers.get('anthropic-ratelimit-output-tokens-limit', '?')}"
)
except Exception as e:
django_logger.warning(f"✨🦔 Unable to log capacity info: {str(e)}")

# Log token usage and cache metrics
if message.usage:
input_tokens = getattr(message.usage, "input_tokens", 0)
output_tokens = getattr(message.usage, "output_tokens", 0)
cache_created = getattr(message.usage, "cache_creation_input_tokens", 0)
cache_read = getattr(message.usage, "cache_read_input_tokens", 0)
fresh_input = getattr(message.usage, "input_tokens", 0)

django_logger.info(f"✨🦔 Request Usage - Input: {input_tokens}, Output: {output_tokens} tokens")
if cache_created or cache_read:
django_logger.info(
f"✨🦔 Cache Stats - Created: {cache_created}, Read: {cache_read}, Fresh: {fresh_input}"
)

# Extract the necessary fields from the Message object
result = {
"content": [block.dict() for block in response.content]
if isinstance(response.content, list)
else response.content,
"stop_reason": response.stop_reason,
"usage": response.usage.dict() if response.usage else None,
"content": [block.dict() for block in message.content]
if isinstance(message.content, list)
else message.content,
"stop_reason": message.stop_reason,
"usage": message.usage.dict() if message.usage else None,
}

django_logger.debug(f"Processed API response: {result}")
django_logger.debug(f"✨🦔 Processed API response: {result}")
return result

except anthropic.RateLimitError as e:
django_logger.warning(f"Rate limit exceeded: {str(e)}")
retry_after = getattr(e, "retry_after", 30)
return self._handle_rate_limit(retry_after)
try:
# Get reset time from headers if available
headers = e.response.headers if hasattr(e, "response") and hasattr(e.response, "headers") else {}

# Try to get retry-after header first
if "retry-after" in headers:
retry_seconds = int(headers["retry-after"])
else:
# Calculate from reset timestamp
now = datetime.now(UTC)
reset_times = []

for header in headers:
if header.endswith("-reset"):
try:
reset_time = datetime.fromisoformat(headers[header].rstrip("Zs")).replace(tzinfo=UTC)
wait_seconds = max(0, int((reset_time - now).total_seconds()))
reset_times.append(wait_seconds)
except (ValueError, TypeError):
continue

retry_seconds = max(reset_times) if reset_times else 15

django_logger.warning(f"✨🦔 Rate limit hit - waiting {retry_seconds} seconds before retry")
return self._handle_rate_limit(retry_seconds)

except Exception as header_error:
django_logger.warning(f"✨🦔 Rate limit handling error: {str(header_error)}")
return self._handle_rate_limit(15) # Default to 15 seconds
except Exception as e:
django_logger.error(f"Request to Anthropic API failed: {str(e)}", exc_info=True)
django_logger.error(f"✨🦔 Request to Anthropic API failed: {str(e)}", exc_info=True)
return Response({"error": str(e)}, status=status.HTTP_500_INTERNAL_SERVER_ERROR)
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,18 @@ import { loaders } from 'kea-loaders'
import type { sidePanelMaxAILogicType } from './sidePanelMaxAILogicType'
import { sidePanelMaxAPI } from './sidePanelMaxAPI'

interface RateLimit {
limit: number
remaining: number
reset: string
}

interface RateLimits {
requests: RateLimit
input_tokens: RateLimit
output_tokens: RateLimit
}

export interface ChatMessage {
role: 'user' | 'assistant'
content: string
Expand All @@ -14,7 +26,7 @@ export interface ChatMessage {

interface MaxResponse {
content: string | { text: string; type: string }
isRateLimited?: boolean
rate_limits?: RateLimits
isError?: boolean
}

Expand Down Expand Up @@ -94,20 +106,37 @@ export const sidePanelMaxAILogic = kea<sidePanelMaxAILogicType>([
const response = (await sidePanelMaxAPI.sendMessage(message)) as MaxResponse
await breakpoint(100)

const content = typeof response.content === 'string' ? response.content : response.content.text
let messageContent =
typeof response.content === 'string' ? response.content : response.content.text

// Check rate limits
const { rate_limits } = response
if (rate_limits) {
const isLimited = Object.values(rate_limits).some((limit) => limit.remaining === 0)
if (isLimited) {
actions.setRateLimited(true)
// Find the shortest reset time
const resetTimes = Object.values(rate_limits)
.map((limit) => new Date(limit.reset).getTime())
.filter((time) => !isNaN(time))
if (resetTimes.length > 0) {
const earliestReset = Math.min(...resetTimes)
const waitSeconds = Math.max(0, Math.ceil((earliestReset - Date.now()) / 1000))
messageContent = `🫣 Rate limit hit! Please try again in ${waitSeconds} seconds. 🦔`
}
}
}

if (response.isRateLimited) {
actions.setRateLimited(true)
} else if (response.isError) {
if (response.isError) {
actions.setServerError(true)
} else {
actions.setRateLimited(false)
actions.setServerError(false)
}

actions.appendAssistantMessage(content)
actions.appendAssistantMessage(messageContent)
setTimeout(() => actions.setSearchingThinking(false), 100)
return content
return messageContent
} catch (error: unknown) {
if (
error &&
Expand Down
Original file line number Diff line number Diff line change
@@ -1,7 +1,24 @@
import api from 'lib/api'

interface RateLimit {
limit: number
remaining: number
reset: string
}

interface RateLimits {
requests: RateLimit
input_tokens: RateLimit
output_tokens: RateLimit
}

interface MaxResponse {
content: string
rate_limits: RateLimits
}

export const sidePanelMaxAPI = {
async sendMessage(message: string): Promise<{ content: string }> {
async sendMessage(message: string): Promise<MaxResponse> {
// Get or create session ID using sessionStorage
let sessionId = sessionStorage.getItem('max_session_id')
if (!sessionId) {
Expand All @@ -20,6 +37,9 @@ export const sidePanelMaxAPI = {
}

const data = await response.json()
return { content: data.content }
return {
content: data.content,
rate_limits: data.rate_limits,
}
},
}
2 changes: 1 addition & 1 deletion requirements.in
Original file line number Diff line number Diff line change
Expand Up @@ -113,5 +113,5 @@ xmlsec==1.3.13 # Do not change this version - it will break SAML
lxml==4.9.4 # Do not change this version - it will break SAML
grpcio~=1.63.2 # Version constrained so that `deepeval` can be installed in in dev
tenacity~=8.4.2 # Version constrained so that `deepeval` can be installed in in dev
anthropic==0.40.0
anthropic==0.42.0
beautifulsoup4==4.12.3
2 changes: 1 addition & 1 deletion requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -25,7 +25,7 @@ amqp==5.1.1
# via kombu
annotated-types==0.7.0
# via pydantic
anthropic==0.40.0
anthropic==0.42.0
# via -r requirements.in
antlr4-python3-runtime==4.13.1
# via -r requirements.in
Expand Down

0 comments on commit ab99ce4

Please sign in to comment.