Skip to content

Commit

Permalink
feat: Added website data source!
Browse files Browse the repository at this point in the history
  • Loading branch information
amindadgar committed Nov 27, 2024
1 parent b444891 commit f9dff40
Show file tree
Hide file tree
Showing 4 changed files with 43 additions and 0 deletions.
18 changes: 18 additions & 0 deletions subquery.py
Original file line number Diff line number Diff line change
Expand Up @@ -15,6 +15,7 @@
NotionQueryEngine,
TelegramDualQueryEngine,
TelegramQueryEngine,
WebsiteQueryEngine,
prepare_discord_engine_auto_filter,
)

Expand All @@ -29,6 +30,7 @@ def query_multiple_source(
telegram: bool = False,
github: bool = False,
mediaWiki: bool = False,
website: bool = False,
) -> tuple[str, list[NodeWithScore]]:
"""
query multiple platforms and get an answer from the multiple
Expand Down Expand Up @@ -180,6 +182,22 @@ def query_multiple_source(
)
)

if website and check_collection("website"):
website_query_engine = WebsiteQueryEngine(community_id=community_id).prepare()
tool_metadata = ToolMetadata(
name="Website",
description=(
"Hosts a diverse collection of crawled data from various "
"online sources to facilitate community insights and analysis."
),
)
query_engine_tools.append(
QueryEngineTool(
query_engine=website_query_engine,
metadata=tool_metadata,
)
)

embed_model = CohereEmbedding()
llm = OpenAI("gpt-3.5-turbo")
Settings.embed_model = embed_model
Expand Down
17 changes: 17 additions & 0 deletions tests/unit/test_website_query_engine.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,17 @@
from unittest import TestCase

from llama_index.core.indices.vector_store.retrievers.retriever import (
VectorIndexRetriever,
)
from utils.query_engine import WebsiteQueryEngine


class TestNotionQueryEngine(TestCase):
def setUp(self) -> None:
community_id = "sample_community"
self.notion_query_engine = WebsiteQueryEngine(community_id)

def test_prepare_engine(self):
notion_query_engine = self.notion_query_engine.prepare(testing=True)
print(notion_query_engine.__dict__)
self.assertIsInstance(notion_query_engine.retriever, VectorIndexRetriever)
1 change: 1 addition & 0 deletions utils/query_engine/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,3 +7,4 @@
from .prepare_discord_query_engine import prepare_discord_engine_auto_filter
from .subquery_gen_prompt import DEFAULT_GUIDANCE_SUB_QUESTION_PROMPT_TMPL
from .telegram import TelegramDualQueryEngine, TelegramQueryEngine
from .website import WebsiteQueryEngine
7 changes: 7 additions & 0 deletions utils/query_engine/website.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from utils.query_engine.base_qdrant_engine import BaseQdrantEngine


class WebsiteQueryEngine(BaseQdrantEngine):
def __init__(self, community_id: str) -> None:
platform_name = "website"
super().__init__(platform_name, community_id)

0 comments on commit f9dff40

Please sign in to comment.