Skip to content

Commit

Permalink
update openai and sealion usage costs
Browse files Browse the repository at this point in the history
  • Loading branch information
devxpy committed Jul 1, 2024
1 parent 28df481 commit a53bdb5
Show file tree
Hide file tree
Showing 10 changed files with 194 additions and 83 deletions.
6 changes: 5 additions & 1 deletion bots/admin_links.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,11 @@
from daras_ai_v2 import settings


def open_in_new_tab(url: str, *, label: str = "", add_related_url: str = None) -> str:
def open_in_new_tab(
url: str | None, *, label: str = "", add_related_url: str = None
) -> str | None:
if not url:
return None
label = re.sub(r"https?://", "", label)
context = {
"url": url,
Expand Down
66 changes: 50 additions & 16 deletions daras_ai_v2/language_model.py
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@
from openai.types.chat import (
ChatCompletionContentPartParam,
ChatCompletionChunk,
ChatCompletion,
)

from daras_ai.image_input import gs_url_to_uri, bytes_to_cv2_img, cv2_img_to_bytes
Expand All @@ -41,6 +42,8 @@
# nice for showing streaming progress
SUPERSCRIPT = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")

AZURE_OPENAI_MODEL_PREFIX = "openai-"


class LLMApis(Enum):
palm2 = 1
Expand Down Expand Up @@ -636,6 +639,9 @@ def _run_self_hosted_chat(
avoid_repetition: bool,
stop: list[str] | None,
) -> list[dict]:
from usage_costs.cost_utils import record_cost_auto
from usage_costs.models import ModelSku

# sea lion doesnt support system prompt
if model == LargeLanguageModels.sea_lion_7b_instruct.model_id:
for i, entry in enumerate(messages):
Expand All @@ -656,6 +662,19 @@ def _run_self_hosted_chat(
repetition_penalty=1.15 if avoid_repetition else 1,
),
)

if usage := ret.get("usage"):
record_cost_auto(
model=model,
sku=ModelSku.llm_prompt,
quantity=usage["prompt_tokens"],
)
record_cost_auto(
model=model,
sku=ModelSku.llm_completion,
quantity=usage["completion_tokens"],
)

return [
{
"role": CHATML_ROLE_ASSISTANT,
Expand Down Expand Up @@ -757,7 +776,7 @@ def _run_openai_chat(
presence_penalty = 0
if isinstance(model, str):
model = [model]
r, used_model = try_all(
completion, used_model = try_all(
*[
_get_chat_completions_create(
model=model_str,
Expand All @@ -780,10 +799,10 @@ def _run_openai_chat(
],
)
if stream:
return _stream_openai_chunked(r, used_model, messages)
return _stream_openai_chunked(completion, used_model, messages)
else:
ret = [choice.message.dict() for choice in r.choices]
record_openai_llm_usage(used_model, messages, ret)
ret = [choice.message.dict() for choice in completion.choices]
record_openai_llm_usage(used_model, completion, messages, ret)
return ret


Expand All @@ -809,6 +828,7 @@ def _stream_openai_chunked(
ret = []
chunk_size = start_chunk_size

completion_chunk = None
for completion_chunk in r:
changed = False
for choice in completion_chunk.choices:
Expand Down Expand Up @@ -860,28 +880,42 @@ def _stream_openai_chunked(
entry["content"] += entry["chunk"]
yield ret

record_openai_llm_usage(used_model, messages, ret)
if not completion_chunk:
return
record_openai_llm_usage(used_model, completion_chunk, messages, ret)


def record_openai_llm_usage(
used_model: str, messages: list[ConversationEntry], choices: list[ConversationEntry]
model: str,
completion: ChatCompletion | ChatCompletionChunk,
messages: list[ConversationEntry],
choices: list[ConversationEntry],
):
from usage_costs.cost_utils import record_cost_auto
from usage_costs.models import ModelSku

if completion.usage:
prompt_tokens = completion.usage.prompt_tokens
completion_tokens = completion.usage.completion_tokens
else:
prompt_tokens = sum(
default_length_function(get_entry_text(entry), model=completion.model)
for entry in messages
)
completion_tokens = sum(
default_length_function(get_entry_text(entry), model=completion.model)
for entry in choices
)

record_cost_auto(
model=used_model,
model=model,
sku=ModelSku.llm_prompt,
quantity=sum(
default_length_function(get_entry_text(entry)) for entry in messages
),
quantity=prompt_tokens,
)
record_cost_auto(
model=used_model,
model=model,
sku=ModelSku.llm_completion,
quantity=sum(
default_length_function(get_entry_text(entry)) for entry in choices
),
quantity=completion_tokens,
)


Expand Down Expand Up @@ -928,14 +962,14 @@ def _run_openai_text(
def get_openai_client(model: str):
import openai

if "-ca-" in model:
if model.startswith(AZURE_OPENAI_MODEL_PREFIX) and "-ca-" in model:
client = openai.AzureOpenAI(
api_key=settings.AZURE_OPENAI_KEY_CA,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT_CA,
api_version="2023-10-01-preview",
max_retries=0,
)
elif "-eastus2-" in model:
elif model.startswith(AZURE_OPENAI_MODEL_PREFIX) and "-eastus2-" in model:
client = openai.AzureOpenAI(
api_key=settings.AZURE_OPENAI_KEY_EASTUS2,
azure_endpoint=settings.AZURE_OPENAI_ENDPOINT_EASTUS2,
Expand Down
4 changes: 2 additions & 2 deletions daras_ai_v2/text_splitter.py
Original file line number Diff line number Diff line change
Expand Up @@ -40,11 +40,11 @@
threadlocal = threading.local()


def default_length_function(text: str) -> int:
def default_length_function(text: str, model: str = "gpt-4") -> int:
try:
enc = threadlocal.enc
except AttributeError:
enc = tiktoken.encoding_for_model("gpt-4")
enc = tiktoken.encoding_for_model(model)
threadlocal.enc = enc
return len(enc.encode(text))

Expand Down
Loading

0 comments on commit a53bdb5

Please sign in to comment.