update openai and sealion usage costs

GooeyAI · Jul 1, 2024 · a53bdb5 · a53bdb5
1 parent 28df481
commit a53bdb5
Show file tree

Hide file tree

Showing 10 changed files with 194 additions and 83 deletions.
diff --git a/bots/admin_links.py b/bots/admin_links.py
@@ -10,7 +10,11 @@
 from daras_ai_v2 import settings
 
 
-def open_in_new_tab(url: str, *, label: str = "", add_related_url: str = None) -> str:
+def open_in_new_tab(
+    url: str | None, *, label: str = "", add_related_url: str = None
+) -> str | None:
+    if not url:
+        return None
     label = re.sub(r"https?://", "", label)
     context = {
         "url": url,

diff --git a/daras_ai_v2/language_model.py b/daras_ai_v2/language_model.py
@@ -18,6 +18,7 @@
 from openai.types.chat import (
     ChatCompletionContentPartParam,
     ChatCompletionChunk,
+    ChatCompletion,
 )
 
 from daras_ai.image_input import gs_url_to_uri, bytes_to_cv2_img, cv2_img_to_bytes
@@ -41,6 +42,8 @@
 # nice for showing streaming progress
 SUPERSCRIPT = str.maketrans("0123456789", "⁰¹²³⁴⁵⁶⁷⁸⁹")
 
+AZURE_OPENAI_MODEL_PREFIX = "openai-"
+
 
 class LLMApis(Enum):
     palm2 = 1
@@ -636,6 +639,9 @@ def _run_self_hosted_chat(
     avoid_repetition: bool,
     stop: list[str] | None,
 ) -> list[dict]:
+    from usage_costs.cost_utils import record_cost_auto
+    from usage_costs.models import ModelSku
+
     # sea lion doesnt support system prompt
     if model == LargeLanguageModels.sea_lion_7b_instruct.model_id:
         for i, entry in enumerate(messages):
@@ -656,6 +662,19 @@ def _run_self_hosted_chat(
             repetition_penalty=1.15 if avoid_repetition else 1,
         ),
     )
+
+    if usage := ret.get("usage"):
+        record_cost_auto(
+            model=model,
+            sku=ModelSku.llm_prompt,
+            quantity=usage["prompt_tokens"],
+        )
+        record_cost_auto(
+            model=model,
+            sku=ModelSku.llm_completion,
+            quantity=usage["completion_tokens"],
+        )
+
     return [
         {
             "role": CHATML_ROLE_ASSISTANT,
@@ -757,7 +776,7 @@ def _run_openai_chat(
         presence_penalty = 0
     if isinstance(model, str):
         model = [model]
-    r, used_model = try_all(
+    completion, used_model = try_all(
         *[
             _get_chat_completions_create(
                 model=model_str,
@@ -780,10 +799,10 @@ def _run_openai_chat(
         ],
     )
     if stream:
-        return _stream_openai_chunked(r, used_model, messages)
+        return _stream_openai_chunked(completion, used_model, messages)
     else:
-        ret = [choice.message.dict() for choice in r.choices]
-        record_openai_llm_usage(used_model, messages, ret)
+        ret = [choice.message.dict() for choice in completion.choices]
+        record_openai_llm_usage(used_model, completion, messages, ret)
         return ret
 
 
@@ -809,6 +828,7 @@ def _stream_openai_chunked(
     ret = []
     chunk_size = start_chunk_size
 
+    completion_chunk = None
     for completion_chunk in r:
         changed = False
         for choice in completion_chunk.choices:
@@ -860,28 +880,42 @@ def _stream_openai_chunked(
         entry["content"] += entry["chunk"]
     yield ret
 
-    record_openai_llm_usage(used_model, messages, ret)
+    if not completion_chunk:
+        return
+    record_openai_llm_usage(used_model, completion_chunk, messages, ret)
 
 
 def record_openai_llm_usage(
-    used_model: str, messages: list[ConversationEntry], choices: list[ConversationEntry]
+    model: str,
+    completion: ChatCompletion | ChatCompletionChunk,
+    messages: list[ConversationEntry],
+    choices: list[ConversationEntry],
 ):
     from usage_costs.cost_utils import record_cost_auto
     from usage_costs.models import ModelSku
 
+    if completion.usage:
+        prompt_tokens = completion.usage.prompt_tokens
+        completion_tokens = completion.usage.completion_tokens
+    else:
+        prompt_tokens = sum(
+            default_length_function(get_entry_text(entry), model=completion.model)
+            for entry in messages
+        )
+        completion_tokens = sum(
+            default_length_function(get_entry_text(entry), model=completion.model)
+            for entry in choices
+        )
+
     record_cost_auto(
-        model=used_model,
+        model=model,
         sku=ModelSku.llm_prompt,
-        quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in messages
-        ),
+        quantity=prompt_tokens,
     )
     record_cost_auto(
-        model=used_model,
+        model=model,
         sku=ModelSku.llm_completion,
-        quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in choices
-        ),
+        quantity=completion_tokens,
     )
 
 
@@ -928,14 +962,14 @@ def _run_openai_text(
 def get_openai_client(model: str):
     import openai
 
-    if "-ca-" in model:
+    if model.startswith(AZURE_OPENAI_MODEL_PREFIX) and "-ca-" in model:
         client = openai.AzureOpenAI(
             api_key=settings.AZURE_OPENAI_KEY_CA,
             azure_endpoint=settings.AZURE_OPENAI_ENDPOINT_CA,
             api_version="2023-10-01-preview",
             max_retries=0,
         )
-    elif "-eastus2-" in model:
+    elif model.startswith(AZURE_OPENAI_MODEL_PREFIX) and "-eastus2-" in model:
         client = openai.AzureOpenAI(
             api_key=settings.AZURE_OPENAI_KEY_EASTUS2,
             azure_endpoint=settings.AZURE_OPENAI_ENDPOINT_EASTUS2,

diff --git a/daras_ai_v2/text_splitter.py b/daras_ai_v2/text_splitter.py
@@ -40,11 +40,11 @@
 threadlocal = threading.local()
 
 
-def default_length_function(text: str) -> int:
+def default_length_function(text: str, model: str = "gpt-4") -> int:
     try:
         enc = threadlocal.enc
     except AttributeError:
-        enc = tiktoken.encoding_for_model("gpt-4")
+        enc = tiktoken.encoding_for_model(model)
         threadlocal.enc = enc
     return len(enc.encode(text))