GooeyAI · devxpy · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025 · Jan 10, 2025
diff --git a/daras_ai_v2/base.py b/daras_ai_v2/base.py
@@ -1795,7 +1795,9 @@ def render_variables(self):
         if not self.functions_in_settings:
             functions_input(self.request.user)
         variables_input(
-            template_keys=self.template_keys, allow_add=is_functions_enabled()
+            template_keys=self.template_keys,
+            allow_add=is_functions_enabled(),
+            exclude=self.fields_to_save(),
         )
 
     @classmethod

diff --git a/daras_ai_v2/query_generator.py b/daras_ai_v2/query_generator.py
@@ -18,11 +18,12 @@ def generate_final_search_query(
     context: dict = None,
     response_format_type: typing.Literal["text", "json_object"] = None,
 ):
-    if context is None:
-        context = request.dict()
-        if response:
-            context |= response.dict()
-    instructions = render_prompt_vars(instructions, context).strip()
+    state = request.dict()
+    if response:
+        state |= response.dict()
+    if context:
+        state |= context
+    instructions = render_prompt_vars(instructions, state).strip()
     if not instructions:
         return ""
     return run_language_model(

diff --git a/daras_ai_v2/variables_widget.py b/daras_ai_v2/variables_widget.py
@@ -23,6 +23,7 @@ def variables_input(
     description: str = "Variables let you pass custom parameters to your workflow. Access a variable in your instruction prompt with <a href='https://jinja.palletsprojects.com/en/3.1.x/templates/' target='_blank'>Jinja</a>, e.g. `{{ my_variable }}`\n  ",
     key: str = "variables",
     allow_add: bool = False,
+    exclude: typing.Iterable[str] = (),
 ):
     from recipes.BulkRunner import list_view_editor
 
@@ -45,7 +46,7 @@ def variables_input(
     var_names = (
         (template_var_names | set(variables.keys()))
         - set(context_globals().keys())  # dont show global context variables
-        - set(gui.session_state.keys())  # dont show other session state variables
+        - set(exclude)  # used for hiding request/response fields
     )
     pressed_add = False
     if var_names or allow_add:

diff --git a/daras_ai_v2/vector_search.py b/daras_ai_v2/vector_search.py
@@ -8,6 +8,7 @@
 import re
 import tempfile
 import typing
+import unicodedata
 from functools import partial
 from time import time
 
@@ -197,6 +198,7 @@ def get_top_k_references(
     s = time()
     search_result = query_vespa(
         request.search_query,
+        request.keyword_query,
         file_ids=vespa_file_ids,
         limit=request.max_references or 100,
         embedding_model=embedding_model,
@@ -245,34 +247,63 @@ def vespa_search_results_to_refs(
 
 def query_vespa(
     search_query: str,
+    keyword_query: str | list[str] | None,
     file_ids: list[str],
     limit: int,
     embedding_model: EmbeddingModels,
     semantic_weight: float = 1.0,
+    threshold: float = 0.7,
+    rerank_count: int = 1000,
 ) -> dict:
-    query_embedding = create_embeddings_cached([search_query], model=embedding_model)[0]
-    if query_embedding is None or not file_ids:
+    if not file_ids:
         return {"root": {"children": []}}
-    file_ids_str = ", ".join(map(repr, file_ids))
-    query = f"select * from {settings.VESPA_SCHEMA} where file_id in (@fileIds) and (userQuery() or ({{targetHits: {limit}}}nearestNeighbor(embedding, q))) limit {limit}"
-    logger.debug(f"Vespa query: {query!r}")
-    if semantic_weight == 1.0:
-        ranking = "semantic"
-    elif semantic_weight == 0.0:
+
+    yql = "select * from %(schema)s where file_id in (@fileIds) and " % dict(
+        schema=settings.VESPA_SCHEMA
+    )
+    bm25_yql = "( {targetHits: %(hits)i} userInput(@bm25Query) )"
+    semantic_yql = "( {targetHits: %(hits)i, distanceThreshold: %(threshold)f} nearestNeighbor(embedding, queryEmbedding) )"
+
+    if semantic_weight == 0.0:
+        yql += bm25_yql % dict(hits=limit)
         ranking = "bm25"
+    elif semantic_weight == 1.0:
+        yql += semantic_yql % dict(hits=limit, threshold=threshold)
+        ranking = "semantic"
     else:
+        yql += (
+            "( "
+            + bm25_yql % dict(hits=rerank_count)
+            + " or "
+            + semantic_yql % dict(hits=rerank_count, threshold=threshold)
+            + " )"
+        )
         ranking = "fusion"
-    response = get_vespa_app().query(
-        yql=query,
-        query=search_query,
-        ranking=ranking,
-        body={
-            "ranking.features.query(q)": padded_embedding(query_embedding),
-            "ranking.features.query(semanticWeight)": semantic_weight,
-            "fileIds": file_ids_str,
-        },
+
+    body = {"yql": yql, "ranking": ranking, "hits": limit}
+
+    if ranking in ("bm25", "fusion"):
+        if isinstance(keyword_query, list):
+            keyword_query = " ".join(keyword_query)
+        body["bm25Query"] = remove_control_characters(keyword_query or search_query)
+
+    logger.debug(
+        "vespa query " + " ".join(repr(f"{k}={v}") for k, v in body.items()) + " ..."
     )
+
+    if ranking in ("semantic", "fusion"):
+        query_embedding = create_embeddings_cached(
+            [search_query], model=embedding_model
+        )[0]
+        if query_embedding is None:
+            return {"root": {"children": []}}
+        body["input.query(queryEmbedding)"] = padded_embedding(query_embedding)
+
+    body["fileIds"] = ", ".join(map(repr, file_ids))
+
+    response = get_vespa_app().query(body)
     assert response.is_successful()
+
     return response.get_json()
 
 
@@ -601,6 +632,23 @@ def _sha256(x) -> str:
     return hashlib.sha256(str(x).encode()).hexdigest()
 
 
+def format_embedding_row(
+    doc_id: str,
+    file_id: str,
+    ref: SearchReference,
+    embedding: np.ndarray,
+    created_at: datetime.datetime,
+):
+    return dict(
+        id=doc_id,
+        file_id=file_id,
+        embedding=padded_embedding(embedding),
+        created_at=int(created_at.timestamp() * 1000),
+        title=remove_control_characters(ref["title"]),
+        snippet=remove_control_characters(ref["snippet"]),
+    )
+
+
 def get_embeds_for_doc(
     *,
     f_url: str,
@@ -1063,22 +1111,9 @@ def render_sources_widget(refs: list[SearchReference]):
         )
 
 
-def format_embedding_row(
-    doc_id: str,
-    file_id: str,
-    ref: SearchReference,
-    embedding: np.ndarray,
-    created_at: datetime.datetime,
-):
-    return dict(
-        id=doc_id,
-        file_id=file_id,
-        embedding=padded_embedding(embedding),
-        created_at=int(created_at.timestamp() * 1000),
-        # url=ref["url"].encode("unicode-escape").decode(),
-        # title=ref["title"].encode("unicode-escape").decode(),
-        # snippet=ref["snippet"].encode("unicode-escape").decode(),
-    )
+def remove_control_characters(s):
+    # from https://docs.vespa.ai/en/troubleshooting-encoding.html
+    return "".join(ch for ch in s if unicodedata.category(ch)[0] != "C")
 
 
 EMBEDDING_SIZE = 3072

diff --git a/pyproject.toml b/pyproject.toml
@@ -78,7 +78,7 @@ loguru = "^0.7.2"
 aifail = "^0.3.0"
 pytest-playwright = "^0.4.3"
 emoji = "^2.10.1"
-pyvespa = "^0.39.0"
+pyvespa = "^0.51.0"
 anthropic = "^0.34.1"
 azure-cognitiveservices-speech = "^1.37.0"
 twilio = "^9.2.3"

diff --git a/recipes/Functions.py b/recipes/Functions.py
@@ -136,6 +136,7 @@ def render_variables(self):
             allow_add=True,
             description="Pass custom parameters to your function and access the parent workflow data. "
             "Variables will be passed down as the first argument to your anonymous JS function.",
+            exclude=self.fields_to_save(),
         )
 
         options = set(gui.session_state.get("secrets") or [])

diff --git a/recipes/VideoBots.py b/recipes/VideoBots.py
@@ -979,8 +979,9 @@ def search_step(self, request, response, user_input, model):
                 yield "Creating search query..."
                 response.final_search_query = generate_final_search_query(
                     request=request,
+                    response=response,
                     instructions=query_instructions,
-                    context={**gui.session_state, "messages": chat_history},
+                    context={"messages": chat_history},
                 )
             else:
                 query_msgs.reverse()
@@ -998,8 +999,9 @@ def search_step(self, request, response, user_input, model):
                 keyword_query = json.loads(
                     generate_final_search_query(
                         request=k_request,
+                        response=response,
                         instructions=keyword_instructions,
-                        context={**gui.session_state, "messages": chat_history},
+                        context={"messages": chat_history},
                         response_format_type="json_object",
                     ),
                 )
@@ -1011,7 +1013,8 @@ def search_step(self, request, response, user_input, model):
             response.references = yield from get_top_k_references(
                 DocSearchRequest.parse_obj(
                     {
-                        **gui.session_state,
+                        **request.dict(),
+                        **response.dict(),
                         "search_query": response.final_search_query,
                         "keyword_query": response.final_keyword_query,
                     },

diff --git a/scripts/setup_vespa_db.py b/scripts/setup_vespa_db.py
@@ -6,12 +6,11 @@
     Schema,
     Document,
     Field,
-    FieldSet,
     HNSW,
     RankProfile,
+    FieldSet,
     Function,
     GlobalPhaseRanking,
-    QueryTypeField,
 )
 
 from daras_ai_v2 import settings
@@ -35,110 +34,71 @@
                         rank="filter",
                     ),
                     Field(
-                        name="url",
+                        name="file_id",
                         type="string",
                         indexing=["attribute", "summary"],
-                    ),
-                    Field(
-                        name="title",
-                        type="string",
-                        indexing=["index", "summary"],
-                        index="enable-bm25",
-                    ),
-                    Field(
-                        name="snippet",
-                        type="string",
-                        indexing=["index", "summary"],
-                        index="enable-bm25",
+                        attribute=["fast-search"],
+                        rank="filter",
                     ),
                     Field(
                         name="embedding",
                         type=EMBEDDING_TYPE,
                         indexing=["index", "attribute"],
                         ann=HNSW(distance_metric="dotproduct"),
                     ),
-                    Field(
-                        name="file_id",
-                        type="string",
-                        indexing=["attribute", "summary"],
-                        attribute=["fast-search"],
-                        rank="filter",
-                    ),
                     Field(
                         name="created_at",
                         type="long",
                         indexing=["attribute"],
                         attribute=["fast-access"],
                     ),
+                    Field(
+                        name="title",
+                        type="string",
+                        indexing=["index", "summary"],
+                        index="enable-bm25",
+                    ),
+                    Field(
+                        name="snippet",
+                        type="string",
+                        indexing=["index", "summary"],
+                        index="enable-bm25",
+                    ),
                 ]
             ),
             fieldsets=[FieldSet(name="default", fields=["title", "snippet"])],
             rank_profiles=[
                 RankProfile(
                     name="bm25",
-                    inputs=[
-                        ("query(q)", EMBEDDING_TYPE),
-                    ],
-                    functions=[
-                        Function(
-                            name="bm25sum", expression="bm25(title) + bm25(snippet)"
-                        )
-                    ],
-                    first_phase="bm25sum",
+                    first_phase="bm25(title) + bm25(snippet)",
                 ),
                 RankProfile(
                     name="semantic",
-                    inputs=[
-                        ("query(q)", EMBEDDING_TYPE),
-                    ],
+                    inputs=[("query(queryEmbedding)", EMBEDDING_TYPE)],
                     first_phase="closeness(field, embedding)",
                 ),
                 RankProfile(
                     name="fusion",
-                    inherits="bm25",
                     inputs=[
-                        ("query(q)", EMBEDDING_TYPE),
+                        ("query(queryEmbedding)", EMBEDDING_TYPE),
                         ("query(semanticWeight)", "double"),
                     ],
-                    first_phase="closeness(field, embedding)",
-                    global_phase=GlobalPhaseRanking(
-                        expression="""
-                        if (closeness(field, embedding)>0.6,
-                            reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) +
-                            reciprocal_rank(closeness(field, embedding)) * query(semanticWeight),
-                            0)
-                    """,
-                        rerank_count=1000,
-                    ),
-                ),
-                RankProfile(
-                    name="fusion2",  # with bm25 first
-                    inherits="bm25",
-                    inputs=[
-                        ("query(q)", EMBEDDING_TYPE),
-                        ("query(semanticWeight)", "double"),
+                    functions=[
+                        Function(
+                            name="bm25sum",
+                            expression="bm25(title) + bm25(snippet)",
+                        ),
                     ],
-                    first_phase="closeness(field, embedding)",
+                    first_phase="bm25sum",
                     global_phase=GlobalPhaseRanking(
-                        expression="""
-                        if (bm25sum>0.6,
-                            reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) +
-                            reciprocal_rank(closeness(field, embedding)) * query(semanticWeight),
-                            0)
-                    """,
+                        expression="reciprocal_rank(bm25sum) * (1 - query(semanticWeight)) + reciprocal_rank(closeness(field, embedding)) * query(semanticWeight)",
                         rerank_count=1000,
                     ),
                 ),
             ],
         )
     ],
 )
-package.query_profile_type.add_fields(
-    QueryTypeField(
-        name="ranking.features.query(q)",
-        type=EMBEDDING_TYPE,
-    ),
-)
 
 
 def run():