GooeyAI · devxpy · Jul 22, 2024 · Jul 15, 2024 · Jul 22, 2024 · Jul 22, 2024
diff --git a/daras_ai_v2/doc_search_settings_widgets.py b/daras_ai_v2/doc_search_settings_widgets.py
@@ -10,7 +10,6 @@
 from daras_ai_v2.embedding_model import EmbeddingModels
 from daras_ai_v2.enum_selector_widget import enum_selector
 from daras_ai_v2.gdrive_downloader import gdrive_list_urls_of_files_in_folder
-from daras_ai_v2.prompt_vars import variables_input
 from daras_ai_v2.search_ref import CitationStyles
 
 _user_media_url_prefix = os.path.join(
@@ -171,8 +170,10 @@ def doc_search_advanced_settings():
     st.number_input(
         label="""
 ###### Max Snippet Words
-
-After a document search, relevant snippets of your documents are returned as results. This setting adjusts the maximum number of words in each snippet. A high snippet size allows the LLM to access more information from your document results, at the cost of being verbose and potentially exhausting input tokens (which can cause a failure of the copilot to respond). Default: 300
+After a document search, relevant snippets of your documents are returned as results.
+This setting adjusts the maximum number of words in each snippet (tokens = words * 2).
+A high snippet size allows the LLM to access more information from your document results, \
+at the cost of being verbose and potentially exhausting input tokens (which can cause a failure of the copilot to respond).
 """,
         key="max_context_words",
         min_value=10,
@@ -181,9 +182,10 @@ def doc_search_advanced_settings():
 
     st.number_input(
         label="""
-###### Overlapping Snippet Lines
-Your knowledge base documents are split into overlapping snippets. This settings adjusts how much those snippets overlap. In general you shouldn't need to adjust this. Default: 5
-
+###### Snippet Overlap Ratio
+Your knowledge base documents are split into overlapping snippets.
+This settings adjusts how much those snippets overlap (overlap tokens = snippet tokens / overlap ratio).
+In general you shouldn't need to adjust this.
 """,
         key="scroll_jump",
         min_value=1,
@@ -194,7 +196,7 @@ def doc_search_advanced_settings():
 def embeddings_model_selector(key: str):
     return enum_selector(
         EmbeddingModels,
-        label="##### Embeddings Model",
+        label="##### ✏ Embeddings Model",
         key=key,
         use_selectbox=True,
     )
diff --git a/daras_ai_v2/language_model.py b/daras_ai_v2/language_model.py
@@ -33,6 +33,10 @@
 from functions.recipe_functions import LLMTools
 
 DEFAULT_SYSTEM_MSG = "You are an intelligent AI assistant. Follow the instructions as closely as possible."
+DEFAULT_JSON_PROMPT = (
+    "Please respond directly in JSON format. "
+    "Don't output markdown or HTML, instead print the JSON object directly without formatting."
+)
 
 CHATML_ROLE_SYSTEM = "system"
 CHATML_ROLE_ASSISTANT = "assistant"
@@ -65,6 +69,7 @@ class LLMSpec(typing.NamedTuple):
     is_chat_model: bool = True
     is_vision_model: bool = False
     is_deprecated: bool = False
+    supports_json: bool = False
 
 
 class LargeLanguageModels(Enum):
@@ -76,6 +81,7 @@ class LargeLanguageModels(Enum):
         context_window=128_000,
         price=10,
         is_vision_model=True,
+        supports_json=True,
     )
     # https://platform.openai.com/docs/models/gpt-4o-mini
     gpt_4_o_mini = LLMSpec(
@@ -85,6 +91,7 @@ class LargeLanguageModels(Enum):
         context_window=128_000,
         price=1,
         is_vision_model=True,
+        supports_json=True,
     )
     # https://platform.openai.com/docs/models/gpt-4-turbo-and-gpt-4
     gpt_4_turbo_vision = LLMSpec(
@@ -97,6 +104,7 @@ class LargeLanguageModels(Enum):
         context_window=128_000,
         price=6,
         is_vision_model=True,
+        supports_json=True,
     )
     gpt_4_vision = LLMSpec(
         label="GPT-4 Vision (openai) 🔻",
@@ -114,6 +122,7 @@ class LargeLanguageModels(Enum):
         llm_api=LLMApis.openai,
         context_window=128_000,
         price=5,
+        supports_json=True,
     )
 
     # https://platform.openai.com/docs/models/gpt-4
@@ -139,6 +148,7 @@ class LargeLanguageModels(Enum):
         llm_api=LLMApis.openai,
         context_window=4096,
         price=1,
+        supports_json=True,
     )
     gpt_3_5_turbo_16k = LLMSpec(
         label="ChatGPT 16k (openai)",
@@ -163,27 +173,31 @@ class LargeLanguageModels(Enum):
         llm_api=LLMApis.groq,
         context_window=8192,
         price=1,
+        supports_json=True,
     )
     llama_3_groq_70b_tool_use = LLMSpec(
         label="Llama 3 Groq 70b Tool Use",
         model_id="llama3-groq-70b-8192-tool-use-preview",
         llm_api=LLMApis.groq,
         context_window=8192,
         price=1,
+        supports_json=True,
     )
     llama3_8b = LLMSpec(
         label="Llama 3 8b (Meta AI)",
         model_id="llama3-8b-8192",
         llm_api=LLMApis.groq,
         context_window=8192,
         price=1,
+        supports_json=True,
     )
     llama_3_groq_8b_tool_use = LLMSpec(
         label="Llama 3 Groq 8b Tool Use",
         model_id="llama3-groq-8b-8192-tool-use-preview",
         llm_api=LLMApis.groq,
         context_window=8192,
         price=1,
+        supports_json=True,
     )
     llama2_70b_chat = LLMSpec(
         label="Llama 2 70b Chat [Deprecated] (Meta AI)",
@@ -199,20 +213,23 @@ class LargeLanguageModels(Enum):
         llm_api=LLMApis.groq,
         context_window=32_768,
         price=1,
+        supports_json=True,
     )
     gemma_2_9b_it = LLMSpec(
         label="Gemma 2 9B (Google)",
         model_id="gemma2-9b-it",
         llm_api=LLMApis.groq,
         context_window=8_192,
         price=1,
+        supports_json=True,
     )
     gemma_7b_it = LLMSpec(
         label="Gemma 7B (Google)",
         model_id="gemma-7b-it",
         llm_api=LLMApis.groq,
         context_window=8_192,
         price=1,
+        supports_json=True,
     )
 
     # https://cloud.google.com/vertex-ai/docs/generative-ai/learn/models
@@ -358,6 +375,7 @@ def __init__(self, *args):
         self.is_deprecated = spec.is_deprecated
         self.is_chat_model = spec.is_chat_model
         self.is_vision_model = spec.is_vision_model
+        self.supports_json = spec.supports_json
 
     @property
     def value(self):
@@ -431,18 +449,34 @@ def run_language_model(
 
     model: LargeLanguageModels = LargeLanguageModels[str(model)]
     if model.is_chat_model:
-        if not messages:
+        if prompt and not messages:
             # convert text prompt to chat messages
             messages = [
-                {"role": "system", "content": DEFAULT_SYSTEM_MSG},
-                {"role": "user", "content": prompt},
+                format_chat_entry(role=CHATML_ROLE_SYSTEM, content=DEFAULT_SYSTEM_MSG),
+                format_chat_entry(role=CHATML_ROLE_USER, content=prompt),
             ]
         if not model.is_vision_model:
             # remove images from the messages
             messages = [
                 format_chat_entry(role=entry["role"], content=get_entry_text(entry))
                 for entry in messages
             ]
+        if (
+            messages
+            and response_format_type == "json_object"
+            and "JSON" not in str(messages).upper()
+        ):
+            if messages[0]["role"] != CHATML_ROLE_SYSTEM:
+                messages.insert(
+                    0,
+                    format_chat_entry(
+                        role=CHATML_ROLE_SYSTEM, content=DEFAULT_JSON_PROMPT
+                    ),
+                )
+            else:
+                messages[0]["content"] = "\n\n".join(
+                    [get_entry_text(messages[0]), DEFAULT_JSON_PROMPT]
+                )
         entries = _run_chat_model(
             api=model.llm_api,
             model=model.model_id,
@@ -628,6 +662,7 @@ def _run_chat_model(
                 max_tokens=max_tokens,
                 temperature=temperature,
                 avoid_repetition=avoid_repetition,
+                response_format_type=response_format_type,
                 stop=stop,
             )
         case LLMApis.anthropic:
@@ -1025,6 +1060,7 @@ def _run_groq_chat(
     temperature: float,
     avoid_repetition: bool,
     stop: list[str] | None,
+    response_format_type: ResponseFormatType | None,
 ):
     from usage_costs.cost_utils import record_cost_auto
     from usage_costs.models import ModelSku
@@ -1040,6 +1076,8 @@ def _run_groq_chat(
         data["presence_penalty"] = 0.25
     if stop:
         data["stop"] = stop
+    if response_format_type:
+        data["response_format"] = {"type": response_format_type}
     r = requests.post(
         "https://api.groq.com/openai/v1/chat/completions",
         json=data,

diff --git a/daras_ai_v2/language_model_settings_widgets.py b/daras_ai_v2/language_model_settings_widgets.py
@@ -1,49 +1,56 @@
+from pydantic import BaseModel, Field
+
 import gooey_ui as st
+from daras_ai_v2.enum_selector_widget import enum_selector, BLANK_OPTION
+from daras_ai_v2.field_render import field_title_desc
+from daras_ai_v2.language_model import LargeLanguageModels, ResponseFormatType, LLMApis
 
-from daras_ai_v2.enum_selector_widget import enum_selector
-from daras_ai_v2.language_model import LargeLanguageModels
 
+class LanguageModelSettings(BaseModel):
+    avoid_repetition: bool | None
+    num_outputs: int | None
+    quality: float | None
+    max_tokens: int | None
+    sampling_temperature: float | None
+    response_format_type: ResponseFormatType = Field(
+        None,
+        title="Response Format",
+    )
 
-def language_model_settings(show_selector=True):
-    st.write("##### 🔠 Language Model Settings")
 
-    if show_selector:
-        enum_selector(
-            LargeLanguageModels,
-            label_visibility="collapsed",
-            key="selected_model",
-            use_selectbox=True,
-        )
+def language_model_selector(
+    label: str = "##### 🔠 Language Model Settings",
+    label_visibility: str = "visible",
+    key: str = "selected_model",
+):
+    return enum_selector(
+        LargeLanguageModels,
+        label=label,
+        label_visibility=label_visibility,
+        key=key,
+        use_selectbox=True,
+    )
+
 
-    st.checkbox("Avoid Repetition", key="avoid_repetition")
+def language_model_settings(selected_model: str = None):
+    try:
+        llm = LargeLanguageModels[selected_model]
+    except KeyError:
+        llm = None
 
     col1, col2 = st.columns(2)
     with col1:
-        st.slider(
-            label="""
-###### Answer Outputs
-How many answers should the copilot generate? Additional answer outputs increase the cost of each run.
-            """,
-            key="num_outputs",
-            min_value=1,
-            max_value=4,
-        )
-    if (
-        show_selector
-        and not LargeLanguageModels[
-            st.session_state.get("selected_model") or LargeLanguageModels.gpt_4.name
-        ].is_chat_model
-    ):
+        st.checkbox("Avoid Repetition", key="avoid_repetition")
+    if not llm or llm.supports_json:
         with col2:
-            st.slider(
-                label="""
-###### Attempts
-Generate multiple responses and choose the best one.
-            """,
-                key="quality",
-                min_value=1.0,
-                max_value=5.0,
-                step=0.1,
+            st.selectbox(
+                f"###### {field_title_desc(LanguageModelSettings, 'response_format_type')}",
+                options=[None, "json_object"],
+                key="response_format_type",
+                format_func={
+                    None: BLANK_OPTION,
+                    "json_object": "JSON Object",
+                }.__getitem__,
             )
 
     col1, col2 = st.columns(2)
@@ -68,3 +75,27 @@ def language_model_settings(show_selector=True):
             min_value=0.0,
             max_value=2.0,
         )
+
+    col1, col2 = st.columns(2)
+    with col1:
+        st.slider(
+            label="""
+###### Answer Outputs
+How many answers should the copilot generate? Additional answer outputs increase the cost of each run.
+            """,
+            key="num_outputs",
+            min_value=1,
+            max_value=4,
+        )
+    if llm and not llm.is_chat_model and llm.llm_api == LLMApis.openai:
+        with col2:
+            st.slider(
+                label="""
+###### Attempts
+Generate multiple responses and choose the best one
+                """,
+                key="quality",
+                min_value=1.0,
+                max_value=5.0,
+                step=0.1,
+            )
diff --git a/recipes/BulkEval.py b/recipes/BulkEval.py
@@ -22,6 +22,7 @@
     run_language_model,
     LargeLanguageModels,
 )
+from daras_ai_v2.language_model_settings_widgets import LanguageModelSettings
 from daras_ai_v2.prompt_vars import render_prompt_vars
 from recipes.BulkRunner import read_df_any, list_view_editor, del_button
 from recipes.DocSearch import render_documents
@@ -48,15 +49,6 @@
 ]
 
 
-class LLMSettingsMixin(BaseModel):
-    selected_model: typing.Literal[tuple(e.name for e in LargeLanguageModels)] | None
-    avoid_repetition: bool | None
-    num_outputs: int | None
-    quality: float | None
-    max_tokens: int | None
-    sampling_temperature: float | None
-
-
 class EvalPrompt(typing.TypedDict):
     name: str
     prompt: str
@@ -168,7 +160,7 @@ def related_workflows(self) -> list:
 
         return [BulkRunnerPage, VideoBotsPage, AsrPage, DocSearchPage]
 
-    class RequestModel(LLMSettingsMixin, BasePage.RequestModel):
+    class RequestModelBase(BasePage.RequestModel):
         documents: list[str] = Field(
             title="Input Data Spreadsheet",
             description="""
@@ -193,6 +185,13 @@ class RequestModel(LLMSettingsMixin, BasePage.RequestModel):
             """,
         )
 
+        selected_model: (
+            typing.Literal[tuple(e.name for e in LargeLanguageModels)] | None
+        )
+
+    class RequestModel(LanguageModelSettings, RequestModelBase):
+        pass
+
     class ResponseModel(BaseModel):
         output_documents: list[str]
         final_prompts: list[list[str]] | None