-
Notifications
You must be signed in to change notification settings - Fork 27
/
llm.py
160 lines (134 loc) · 4.44 KB
/
llm.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
from typing import Dict
from collections import defaultdict
import threading
from fastapi import HTTPException
from utils import getenv
import backoff
import openai.error
import litellm
import os
import litellm.exceptions
from litellm.caching import Cache
# litellm.cache = Cache( # optional if you want to use cache
# type="redis",
# host=getenv("REDISHOST", ""),
# port=getenv("REDISPORT", ""),
# password=getenv("REDISPASSWORD", ""),
# )
cost_dict: Dict[str, Dict[str, float]] = defaultdict(dict)
cost_dict_lock = threading.Lock()
def _update_costs_thread(budget_manager: litellm.BudgetManager):
thread = threading.Thread(target=budget_manager.save_data)
thread.start()
class RetryConstantError(Exception):
pass
class RetryExpoError(Exception):
pass
class UnknownLLMError(Exception):
pass
def handle_llm_exception(e: Exception):
if isinstance(
e,
(
openai.error.APIError,
openai.error.TryAgain,
openai.error.Timeout,
openai.error.ServiceUnavailableError,
),
):
raise RetryConstantError from e
elif isinstance(e, openai.error.RateLimitError):
raise RetryExpoError from e
elif isinstance(
e,
(
openai.error.APIConnectionError,
openai.error.InvalidRequestError,
openai.error.AuthenticationError,
openai.error.PermissionError,
openai.error.InvalidAPIType,
openai.error.SignatureVerificationError,
),
):
raise e
else:
raise UnknownLLMError from e
@backoff.on_exception(
wait_gen=backoff.constant,
exception=RetryConstantError,
max_tries=3,
interval=3,
)
@backoff.on_exception(
wait_gen=backoff.expo,
exception=RetryExpoError,
jitter=backoff.full_jitter,
max_value=100,
factor=1.5,
)
def completion(**kwargs) -> litellm.ModelResponse:
user_key = kwargs.pop("user_key")
master_key = kwargs.pop("master_key")
budget_manager: litellm.BudgetManager = kwargs.pop("budget_manager")
def _completion():
try:
default_model = os.getenv("DEFAULT_MODEL", None)
if default_model is not None and default_model != "":
kwargs["model"] = default_model
if user_key == master_key:
# use as admin of the server
response = litellm.completion(**kwargs)
else:
# for end user based rate limiting
if budget_manager.get_current_cost(
user=user_key
) > budget_manager.get_total_budget(user=user_key):
raise HTTPException(
status_code=429, detail={"error": "budget exceeded"}
)
response = litellm.completion(**kwargs)
if "stream" not in kwargs or kwargs["stream"] is not True:
print(f"user_key: {user_key}")
print(f"master_key: {master_key}")
if user_key != master_key: # no budget on master key
# updates both user
budget_manager.update_cost(completion_obj=response, user=user_key)
_update_costs_thread(budget_manager) # Non-blocking
return response
except Exception as e:
print(f"LiteLLM Server: Got exception {e}")
handle_llm_exception(e) # this tries fallback requests
try:
return _completion()
except Exception as e:
raise e
# LiteLLM Config
# config = {
# "function": "completion",
# "default_fallback_models": ["gpt-3.5-turbo", "claude-instant-1", "j2-ultra"],
# "available_models": litellm.utils.get_valid_models(),
# "adapt_to_prompt_size": True,
# "model": {
# "claude-instant-1": {
# "needs_moderation": True
# },
# "claude-2": {
# "needs_moderation": True
# },
# "gpt-3.5-turbo": {
# "error_handling": {
# "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k"}
# }
# },
# "gpt-3.5-turbo-0613": {
# "error_handling": {
# "ContextWindowExceededError": {"fallback_model": "gpt-3.5-turbo-16k-0613"}
# }
# },
# "gpt-4": {
# "error_handling": {
# "ContextWindowExceededError": {"fallback_model": "claude-2"}
# }
# }
# }
# }