Skip to content

Commit

Permalink
feat:Add bot_id search parameter for knowledge search interface. (#189)
Browse files Browse the repository at this point in the history
  • Loading branch information
xingwanying authored Aug 7, 2024
2 parents ed1e193 + 2fe9f4e commit c5c9200
Show file tree
Hide file tree
Showing 17 changed files with 336 additions and 171 deletions.
15 changes: 15 additions & 0 deletions client/app/factory/edit/[id]/page.tsx
Original file line number Diff line number Diff line change
Expand Up @@ -18,6 +18,7 @@ import AIBtnIcon from '@/public/icons/AIBtnIcon';
import ChatIcon from '@/public/icons/ChatIcon';
import ConfigIcon from '@/public/icons/ConfigIcon';
import SaveIcon from '@/public/icons/SaveIcon';
import BookIcon from '@/public/icons/BookIcon';
import { useBot } from '@/app/contexts/BotContext';

import 'react-toastify/dist/ReactToastify.css';
Expand Down Expand Up @@ -247,6 +248,19 @@ export default function Edit({ params }: { params: { id: string } }) {
重新生成配置
</Button>
)}
{isEdit ? (
<Button
radius="full"
className="bg-[#F1F1F1] text-gray-500"
startContent={<BookIcon />}
isLoading={createBotLoading}
onClick={() => {
getBotInfoByRepoName(botProfile?.repoName!);
}}
>
知识库
</Button>
) : null}
</div>

{isEdit && <BotCreateFrom />}
Expand Down Expand Up @@ -377,6 +391,7 @@ export default function Edit({ params }: { params: { id: string } }) {
style={{
backgroundColor: '#FCFCFC',
}}
token={params.id}
apiDomain={API_HOST}
apiUrl="/api/chat/stream_qa"
prompt={botProfile?.prompt}
Expand Down
19 changes: 19 additions & 0 deletions client/public/icons/BookIcon.tsx
Original file line number Diff line number Diff line change
@@ -0,0 +1,19 @@
const BookIcon = () => (
<svg
width="16"
height="16"
viewBox="0 0 16 16"
fill="none"
xmlns="http://www.w3.org/2000/svg"
>
<path
d="M7.5 3.02191C6.4897 2.37519 5.28831 2 4 2C3.24118 2 2.51171 2.13024 1.83338 2.36999C1.63358 2.44061 1.5 2.6295 1.5 2.84141V12.3414C1.5 12.5037 1.57879 12.6559 1.71131 12.7496C1.84383 12.8434 2.01359 12.8669 2.16662 12.8128C2.73941 12.6104 3.35627 12.5 4 12.5C5.33001 12.5 6.54883 12.9715 7.5 13.7571V3.02191Z"
fill="#6B7280"
/>
<path
d="M8.5 13.7571C9.45117 12.9715 10.67 12.5 12 12.5C12.6437 12.5 13.2606 12.6104 13.8334 12.8128C13.9864 12.8669 14.1562 12.8434 14.2887 12.7496C14.4212 12.6559 14.5 12.5037 14.5 12.3414V2.84141C14.5 2.6295 14.3664 2.44061 14.1666 2.36999C13.4883 2.13024 12.7588 2 12 2C10.7117 2 9.5103 2.37519 8.5 3.02191V13.7571Z"
fill="#6B7280"
/>
</svg>
);
export default BookIcon;
2 changes: 1 addition & 1 deletion server/README.md
Original file line number Diff line number Diff line change
@@ -1,2 +1,2 @@

English | <img src="https://gw.alipayobjects.com/zos/antfincdn/R8sN%24GNdh6/language.svg" width="18"> [简体中文](./README.zh-CN.md.md)
English | <img src="https://gw.alipayobjects.com/zos/antfincdn/R8sN%24GNdh6/language.svg" width="18"> [简体中文](./README.zh-CN.md)
90 changes: 88 additions & 2 deletions server/README.zh-CN.md
Original file line number Diff line number Diff line change
@@ -1,9 +1,18 @@
<img src="https://gw.alipayobjects.com/zos/antfincdn/R8sN%24GNdh6/language.svg" width="18"> [English](./README.md) | 简体中文

# 介绍
PeterCat 服务端,采用 FastAPI 框架开发。
PeterCat 服务端,采用 FastAPI 框架开发。使用了 supabase 作为数据存储方案。

# 功能模块
## 存储
采用 [supabase](https://supabase.com) 作为数据库进行存储。
作为开发者你需要熟悉该平台以下功能
- project 管理平台:https://supabase.com/dashboard/project/{projectId}, 请开发者联系管理员赋予相关权限。
- 进入 Project 管理平台后,左边菜单栏中的 Table Editor、SQL Editor、Database 是你的好帮手。
- Table Editor 支持直接修改数据;
- SQL Editor 是一个可以在线编写 SQL 并执行的可视化客户端;你可以在其中创建表、删除表、创建函数、删除函数等操作。
- Database 中提供了数据库的的综合管理;

## github
### webhook
代码目录
Expand Down Expand Up @@ -36,4 +45,81 @@ Webhook URL \> 填入smee channel url, eg: https://smee.io/Q2VVS0casGnhZV

6. 在 demo repository 发起 issue 或者 pull-request,在 smee 、本地将能同步看到请求。

7. 在测试完毕后记得将 Webhook URL 改回去, eg:http://pertercat.chat/api/github/app/webhook
7. 在测试完毕后记得将 Webhook URL 改回去, eg:http://pertercat.chat/api/github/app/webhook

## RAG
### API
> server/routers/rag.py
#### rag/add_knowledge_by_doc
新增知识库。执行将 github 上指定的仓库中的文档进行 Embedding 化后,存储在 supabase 中,对应的 table 为 `rag_docs`

#### rag/search_knowledge
搜索知识。将输入的 query 进行 Embedding 化后,与 supabase 中存储的知识进行匹配,返回匹配结果。

### 数据库
建议将 DB 相关操作备份在 /server/sql/rag_docs.sql 中,方便追踪。
#### 创建知识库
```sql
create extension
if not exists vector;

-- Create a table to store your rag_docs
create table rag_docs
(
id uuid primary key,
content text,
-- corresponds to Document.pageContent
metadata jsonb,
-- corresponds to Document.metadata
embedding vector (1536),
-- 1536 works for OpenAI embeddings, change if needed
-- per request info
repo_name varchar,
commit_id varchar,
bot_id varchar,
file_sha varchar,
file_path varchar
);
```
### 创建 Function
为了实现知识库的 Embedding 查询,需要创建一个 Function。
[supabase 文档教程](https://supabase.com/docs/guides/ai/vector-columns#querying-a-vector--embedding)

> 建议:
> 1. 如果 Function 的入参发生了变化,需要将该function 进行删除后重新创建。事实上建议在项目上线后创建新版本的函数,保留历史函数。
> 2. 将函数备份在本项目中 server/sql/rag_docs.sql
#### 示例
这些 sql 可以在 SQL Editor 中执行。
```sql
-- 删除函数
drop function if exists match_rag_docs_v1;
-- 新建函数
create function match_rag_docs_v1
(
query_embedding vector (1536),
filter jsonb default '{}'
) returns table
(
id uuid,
content text,
metadata jsonb,
embedding vector,
similarity float
) language plpgsql as $$
#variable_conflict use_column
begin
return query
select
id,
content,
metadata,
embedding,
1 - (rag_docs.embedding <=> query_embedding
) as similarity
from rag_docs
where metadata @> jsonb_extract_path(filter, 'metadata')
and bot_id = jsonb_extract_path_text(filter, 'bot_id')
order by rag_docs.embedding <=> query_embedding;
end;
$$;
```
124 changes: 70 additions & 54 deletions server/agent/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -16,17 +16,19 @@
from utils.env import get_env_variable

OPEN_API_KEY = get_env_variable("OPENAI_API_KEY")
TAVILY_API_KEY = get_env_variable("TAVILY_API_KEY")
TAVILY_API_KEY = get_env_variable("TAVILY_API_KEY")


class AgentBuilder:

def __init__(
self,
prompt: str,
tools: Dict[str, Callable],
enable_tavily: Optional[bool] = True,
self,
prompt: str,
tools: Dict[str, Callable],
enable_tavily: Optional[bool] = True,
temperature: Optional[int] = 0.2,
max_tokens: Optional[int] = 1500
max_tokens: Optional[int] = 1500,
runtime_invoke_context: Optional[Dict] = {},
):
"""
@class `Builde AgentExecutor based on tools and prompt`
Expand All @@ -44,25 +46,27 @@ def __init__(
self.agent_executor = self._create_agent_with_tools()

def init_tavily_tools(self):
# init Tavily
# init Tavily
search = TavilySearchAPIWrapper()
tavily_tool = TavilySearchResults(api_wrapper=search)
return [tavily_tool]

def _create_agent_with_tools(self) -> AgentExecutor:
llm = ChatOpenAI(model="gpt-4o", temperature=self.temperature, streaming=True, max_tokens=self.max_tokens, openai_api_key=OPEN_API_KEY)
llm = ChatOpenAI(
model="gpt-4o",
temperature=self.temperature,
streaming=True,
max_tokens=self.max_tokens,
openai_api_key=OPEN_API_KEY,
)

tools = self.init_tavily_tools() if self.enable_tavily else []

tools = self.init_tavily_tools() if self.enable_tavily else []

for tool in self.tools.values():
tools.append(tool)

if tools:
llm_with_tools = llm.bind(
tools=[convert_to_openai_tool(tool) for tool in tools]
)
else:
llm_with_tools = llm
llm = llm.bind_tools([convert_to_openai_tool(tool) for tool in tools])

self.prompt = self.get_prompt()
agent = (
Expand All @@ -74,12 +78,18 @@ def _create_agent_with_tools(self) -> AgentExecutor:
"chat_history": lambda x: x["chat_history"],
}
| self.prompt
| llm_with_tools
| llm
| OpenAIToolsAgentOutputParser()
)

return AgentExecutor(agent=agent, tools=tools, verbose=True, handle_parsing_errors=True, max_iterations=5)

return AgentExecutor(
agent=agent,
tools=tools,
verbose=True,
handle_parsing_errors=True,
max_iterations=5,
)

def get_prompt(self):
return ChatPromptTemplate.from_messages(
[
Expand All @@ -94,7 +104,7 @@ def get_prompt(self):
def chat_history_transform(messages: list[Message]):
transformed_messages = []
for message in messages:
print('message', message)
print("message", message)
if message.role == "user":
transformed_messages.append(HumanMessage(content=message.content))
elif message.role == "assistant":
Expand All @@ -118,55 +128,61 @@ async def run_stream_chat(self, input_data: ChatData) -> AsyncIterator[str]:
if kind == "on_llm_stream" or kind == "on_chat_model_stream":
content = event["data"]["chunk"].content
if content:
json_output = json.dumps({
"id": event["run_id"],
"type": "message",
"content": content,
}, ensure_ascii=False)
json_output = json.dumps(
{
"id": event["run_id"],
"type": "message",
"content": content,
},
ensure_ascii=False,
)
yield f"data: {json_output}\n\n"
elif kind == "on_tool_start":
children_value = event["data"].get("input", {})
json_output = json.dumps({
"id": event["run_id"],
"type": "tool",
"extra": {
"source": f"已调用工具: {event['name']}",
"pluginName": "GitHub",
"data": json.dumps(children_value, ensure_ascii=False),
"status": "loading"
}
}, ensure_ascii=False)

json_output = json.dumps(
{
"id": event["run_id"],
"type": "tool",
"extra": {
"source": f"已调用工具: {event['name']}",
"pluginName": "GitHub",
"data": json.dumps(children_value, ensure_ascii=False),
"status": "loading",
},
},
ensure_ascii=False,
)

yield f"data: {json_output}\n\n"
elif kind == "on_tool_end":
children_value = event["data"].get("output", {})
json_output = json.dumps({
"id": event["run_id"],
"type": "tool",
"extra": {
"source": f"已调用工具: {event['name']}",
"pluginName": "GitHub",
"data": children_value,
"status": "success"
json_output = json.dumps(
{
"id": event["run_id"],
"type": "tool",
"extra": {
"source": f"已调用工具: {event['name']}",
"pluginName": "GitHub",
"data": children_value,
"status": "success",
},
},
}, ensure_ascii=False)
ensure_ascii=False,
)
yield f"data: {json_output}\n\n"
except Exception as e:
res = {
"status": "error",
"message": str(e)
}
res = {"status": "error", "message": str(e)}
yield f"data: {json.dumps(res, ensure_ascii=False)}\n\n"

async def run_chat(self, input_data: ChatData) -> str:
try:
messages = input_data.messages
messages = input_data.messages
return self.agent_executor.invoke(
{
"input": messages[len(messages) - 1].content,
"chat_history": self.chat_history_transform(messages),
},
return_only_outputs=True,
)
return_only_outputs=True,
)
except Exception as e:
return f"error: {str(e)}\n"
1 change: 0 additions & 1 deletion server/agent/bot_builder.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,6 @@
from tools import bot_builder



TOOL_MAPPING = {
"create_bot": bot_builder.create_bot,
"edit_bot": bot_builder.edit_bot,
Expand Down
Loading

0 comments on commit c5c9200

Please sign in to comment.