TogetherCrew · amindadgar · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024 · Apr 18, 2024
diff --git a/bot/retrievers/retrieve_similar_nodes.py b/bot/retrievers/retrieve_similar_nodes.py
@@ -24,7 +24,6 @@ def __init__(
         """Init params."""
         self._vector_store = vector_store
         self._embed_model = embed_model
-        print(f"type(embed_model): {type(embed_model)} | embed_model: {embed_model}")
         self._similarity_top_k = similarity_top_k
 
     def query_db(

diff --git a/subquery.py b/subquery.py
@@ -7,7 +7,10 @@
 from llama_index.llms.openai import OpenAI
 from llama_index.question_gen.guidance import GuidanceQuestionGenerator
 from tc_hivemind_backend.embeddings.cohere import CohereEmbedding
-from utils.query_engine import prepare_discord_engine_auto_filter
+from utils.query_engine import (
+    DEFAULT_GUIDANCE_SUB_QUESTION_PROMPT_TMPL,
+    prepare_discord_engine_auto_filter,
+)
 
 
 def query_multiple_source(
@@ -106,6 +109,7 @@ def query_multiple_source(
     question_gen = GuidanceQuestionGenerator.from_defaults(
         guidance_llm=OpenAIChat("gpt-4"),
         verbose=False,
+        prompt_template_str=DEFAULT_GUIDANCE_SUB_QUESTION_PROMPT_TMPL,
     )
     s_engine = SubQuestionQueryEngine.from_defaults(
         question_gen=question_gen,

diff --git a/utils/query_engine/__init__.py b/utils/query_engine/__init__.py
@@ -1,2 +1,3 @@
 # flake8: noqa
 from .prepare_discord_query_engine import prepare_discord_engine_auto_filter
+from .subquery_gen_prompt import DEFAULT_GUIDANCE_SUB_QUESTION_PROMPT_TMPL
diff --git a/utils/query_engine/subquery_gen_prompt.py b/utils/query_engine/subquery_gen_prompt.py
@@ -0,0 +1,86 @@
+import json
+from typing import Sequence
+
+from llama_index.core.prompts.guidance_utils import convert_to_handlebars
+from llama_index.core.question_gen.types import SubQuestion
+from llama_index.core.tools.types import ToolMetadata
+
+
+def build_tools_text(tools: Sequence[ToolMetadata]) -> str:
+    tools_dict = {}
+    for tool in tools:
+        tools_dict[tool.name] = tool.description
+    return json.dumps(tools_dict, indent=4)
+
+
+PREFIX = """\
+Given a user question, and a list of tools, output a list of relevant sub-questions \
+in json markdown that when composed can help answer the full user question. \
+Define the sub-questions as search queries that can be used for vector similarity search:
+"""
+
+
+example_query_str = (
+    "What was decided about the token allocation budget for the "
+    "next airdrop and what did the community think of this?"
+)
+example_tools = [
+    ToolMetadata(
+        name="Discord",
+        description="Contains messages and summaries of conversations from the Discord platform of the community",
+    ),
+    ToolMetadata(
+        name="Discourse",
+        description="Contains messages and summaries of discussions from the Discourse platform of the community",
+    ),
+]
+example_tools_str = build_tools_text(example_tools)
+example_output = [
+    SubQuestion(
+        sub_question="Decision token allocation budget airdrop", tool_name="Discourse"
+    ),
+    SubQuestion(
+        sub_question="Opinion token allocation budget airdrop", tool_name="Discord"
+    ),
+]
+example_output_str = json.dumps({"items": [x.dict() for x in example_output]}, indent=4)
+
+EXAMPLES = f"""\
+# Example 1
+<Tools>
+```json
+{example_tools_str}
+```
+
+<User Question>
+{example_query_str}
+
+
+<Output>
+```json
+{example_output_str}
+```
+
+""".replace(
+    "{", "{{"
+).replace(
+    "}", "}}"
+)
+
+SUFFIX = """\
+# Example 2
+<Tools>
+```json
+{tools_str}
+```
+
+<User Question>
+{query_str}
+
+<Output>
+"""
+
+DEFAULT_SUB_QUESTION_PROMPT_TMPL = PREFIX + EXAMPLES + SUFFIX
+DEFAULT_GUIDANCE_SUB_QUESTION_PROMPT_TMPL = convert_to_handlebars(
+    DEFAULT_SUB_QUESTION_PROMPT_TMPL
+)