update openai and sealion usage costs

GooeyAI · Jul 1, 2024 · 4c83b66 · 4c83b66
1 parent 28df481
commit 4c83b66
Show file tree

Hide file tree

Showing 6 changed files with 77 additions and 47 deletions.
diff --git a/daras_ai_v2/language_model.py b/daras_ai_v2/language_model.py
@@ -19,6 +19,7 @@
     ChatCompletionContentPartParam,
     ChatCompletionChunk,
 )
+from openai.types.completion_usage import CompletionUsage
 
 from daras_ai.image_input import gs_url_to_uri, bytes_to_cv2_img, cv2_img_to_bytes
 from daras_ai_v2.asr import get_google_auth_session
@@ -636,6 +637,9 @@ def _run_self_hosted_chat(
     avoid_repetition: bool,
     stop: list[str] | None,
 ) -> list[dict]:
+    from usage_costs.cost_utils import record_cost_auto
+    from usage_costs.models import ModelSku
+
     # sea lion doesnt support system prompt
     if model == LargeLanguageModels.sea_lion_7b_instruct.model_id:
         for i, entry in enumerate(messages):
@@ -656,6 +660,19 @@ def _run_self_hosted_chat(
             repetition_penalty=1.15 if avoid_repetition else 1,
         ),
     )
+
+    if usage := ret.get("usage"):
+        record_cost_auto(
+            model=model,
+            sku=ModelSku.llm_prompt,
+            quantity=usage["prompt_tokens"],
+        )
+        record_cost_auto(
+            model=model,
+            sku=ModelSku.llm_completion,
+            quantity=usage["completion_tokens"],
+        )
+
     return [
         {
             "role": CHATML_ROLE_ASSISTANT,
@@ -757,7 +774,7 @@ def _run_openai_chat(
         presence_penalty = 0
     if isinstance(model, str):
         model = [model]
-    r, used_model = try_all(
+    completion, used_model = try_all(
         *[
             _get_chat_completions_create(
                 model=model_str,
@@ -780,11 +797,10 @@ def _run_openai_chat(
         ],
     )
     if stream:
-        return _stream_openai_chunked(r, used_model, messages)
+        return _stream_openai_chunked(completion, used_model, messages)
     else:
-        ret = [choice.message.dict() for choice in r.choices]
-        record_openai_llm_usage(used_model, messages, ret)
-        return ret
+        record_openai_llm_usage(used_model, completion.usage)
+        return [choice.message.dict() for choice in completion.choices]
 
 
 def _get_chat_completions_create(model: str, **kwargs):
@@ -854,34 +870,31 @@ def _stream_openai_chunked(
                 break
         if changed:
             yield ret
+        if completion_chunk.usage:
+            record_openai_llm_usage(used_model, completion_chunk.usage)
 
     # add the leftover chunks
     for entry in ret:
         entry["content"] += entry["chunk"]
     yield ret
 
-    record_openai_llm_usage(used_model, messages, ret)
-
 
 def record_openai_llm_usage(
-    used_model: str, messages: list[ConversationEntry], choices: list[ConversationEntry]
+    used_model,
+    usage: CompletionUsage,
 ):
     from usage_costs.cost_utils import record_cost_auto
     from usage_costs.models import ModelSku
 
     record_cost_auto(
         model=used_model,
         sku=ModelSku.llm_prompt,
-        quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in messages
-        ),
+        quantity=usage.prompt_tokens,
     )
     record_cost_auto(
         model=used_model,
         sku=ModelSku.llm_completion,
-        quantity=sum(
-            default_length_function(get_entry_text(entry)) for entry in choices
-        ),
+        quantity=usage.completion_tokens,
     )
 
 

diff --git a/poetry.lock b/poetry.lock
diff --git a/pyproject.toml b/pyproject.toml
@@ -9,7 +9,7 @@ package-mode = false
 [tool.poetry.dependencies]
 python = ">=3.10,<3.13"
 streamlit = "^1.15.2"
-openai = "^1.2.0"
+openai = "^1.35.7"
 python-decouple = "^3.6"
 requests = "^2.28.1"
 glom = "^22.1.0"

diff --git a/scripts/init_llm_pricing.py b/scripts/init_llm_pricing.py
@@ -5,6 +5,27 @@
 
 
 def run():
+    # GPT-4o
+
+    llm_pricing_create(
+        model_id="gpt-4o",
+        model_name=LargeLanguageModels.gpt_4_o.name,
+        unit_cost_input=5,
+        unit_cost_output=15,
+        unit_quantity=10**6,
+        provider=ModelProvider.openai,
+        pricing_url="https://openai.com/pricing",
+    )
+    llm_pricing_create(
+        model_id="openai-gpt-4o-prod-eastus2-1",
+        model_name=LargeLanguageModels.gpt_4_o.name,
+        unit_cost_input=5,
+        unit_cost_output=15,
+        unit_quantity=10**6,
+        provider=ModelProvider.azure_openai,
+        pricing_url="https://azure.microsoft.com/en-us/pricing/details/cognitive-services/openai-service/",
+    )
+
     # GPT-4-Turbo
 
     for model in ["gpt-4-0125-preview", "gpt-4-1106-preview"]:
@@ -575,6 +596,18 @@ def run():
         pricing_url="https://docs.anthropic.com/claude/docs/models-overview#model-comparison",
     )
 
+    # SEA-LION
+
+    llm_pricing_create(
+        model_id="aisingapore/sea-lion-7b-instruct",
+        model_name=LargeLanguageModels.sea_lion_7b_instruct.name,
+        unit_cost_input=5,
+        unit_cost_output=15,
+        unit_quantity=10**6,
+        provider=ModelProvider.aks,
+        notes="Same as GPT-4o. Note that the actual cost of this model is in GPU Milliseconds",
+    )
+
 
 def llm_pricing_create(
     model_id: str,
@@ -583,7 +616,8 @@ def llm_pricing_create(
     unit_cost_output: float,
     unit_quantity: int,
     provider: ModelProvider,
-    pricing_url: str,
+    pricing_url: str = "",
+    notes: str = "",
 ):
     obj, created = ModelPricing.objects.get_or_create(
         model_id=model_id,
@@ -609,6 +643,7 @@ def llm_pricing_create(
             category=category,
             provider=provider,
             pricing_url=pricing_url,
+            notes=notes,
         ),
     )
     if created:

diff --git a/usage_costs/admin.py b/usage_costs/admin.py
@@ -1,13 +1,14 @@
 from django.contrib import admin
 
 from bots.admin_links import open_in_new_tab, change_obj_url
+from daras_ai.text_format import format_number_with_suffix
 from usage_costs import models
 
 
 class CostQtyMixin:
     @admin.display(description="Cost / Qty", ordering="unit_cost")
-    def cost_qty(self, obj):
-        return f"${obj.unit_cost.normalize()} / {obj.unit_quantity}"
+    def cost_qty(self, obj: models.ModelPricing | models.UsageCost):
+        return f"${obj.unit_cost.normalize()} / {format_number_with_suffix(obj.unit_quantity)}"
 
 
 @admin.register(models.UsageCost)

diff --git a/usage_costs/models.py b/usage_costs/models.py
@@ -89,7 +89,7 @@ class ModelPricing(models.Model):
     unit_cost = models.DecimalField(
         max_digits=max_digits,
         decimal_places=decimal_places,
-        help_text="The cost per unit.",
+        help_text="The cost per unit (in dollars).",
     )
     unit_quantity = models.PositiveIntegerField(
         help_text="The quantity of the unit. (e.g. 1000 tokens)", default=1