Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add LangChain Support to RAG Agent #460

Open
wants to merge 9 commits into
base: main
Choose a base branch
from
62 changes: 62 additions & 0 deletions examples/conversation_with_RAG_agents/configs/lc_agent_config.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,62 @@
[
{
"class": "LangChainAgent",
"args": {
"name": "Tutorial-Assistant",
"description": "Tutorial-Assistant is an agent that can provide answer based on English tutorial material, mainly the markdown files. It can answer general questions about AgentScope.",
"sys_prompt": "You're an assistant helping new users to use AgentScope. The language style is helpful and cheerful. You generate answers based on the provided context. The answer is expected to be no longer than 100 words. If the key words of the question can be found in the provided context, the answer should contain the section name which contains the answer. For example, 'You may refer to SECTION_NAME for more details.'",
"model_config_name": "qwen_config",
"knowledge_id_list": ["agentscope_tutorial_rag"],
"similarity_top_k": 5,
"log_retrieval": false,
"recent_n_mem_for_retrieve": 1
}
},
{
"class": "LangChainAgent",
"args": {
"name": "Code-Search-Assistant",
"description": "Code-Search-Assistant is an agent that can provide answer based on AgentScope code base. It can answer questions about specific modules in AgentScope.",
"sys_prompt": "You're a coding assistant of AgentScope. The answer starts with appreciation for the question, then provide details regarding the functionality and features of the modules mentioned in the question. The language should be in a professional and simple style. The answer is limited to be less than 100 words.",
"model_config_name": "qwen_config",
"knowledge_id_list": ["agentscope_code_rag"],
"search_type": "similarity",
"log_retrieval": true,
"recent_n_mem_for_retrieve": 1
}
},
{
"class": "LangChainAgent",
"args": {
"name": "API-Assistant",
"description": "API-Assistant is an agent that can answer questions about APIs in AgentScope. It can answer general questions about AgentScope.",
"sys_prompt": "You're an assistant providing answers to the questions related to APIs (functions and classes) in AgentScope. The language style is helpful and cheerful. You generate answers based on the provided context. The answer is expected to be no longer than 200 words. If the key words of the question can be found in the provided context, the answer should contain the module of the API. For example, 'You may refer to MODULE_NAME for more details.'",
"model_config_name": "qwen_config",
"knowledge_id_list": ["agentscope_api_rag"],
"search_kwargs": {"k": 2},
"log_retrieval": false,
"recent_n_mem_for_retrieve": 1
}
},
{
"class": "LangChainAgent",
"args": {
"name": "Searching-Assistant",
"description": "Search-Assistant is an agent that can provide answer based on AgentScope code and tutorial. It can answer questions about everything in AgentScope codes and tutorials.",
"sys_prompt": "You're a helpful assistant of AgentScope. The answer starts with appreciation for the question, then provide output the location of the code or section that the most relevant to the question. The answer is limited to be less than 50 words.",
"model_config_name": "qwen_config",
"knowledge_id_list": ["agentscope_code_rag","agentscope_tutorial_rag"],
"log_retrieval": false,
"recent_n_mem_for_retrieve": 1
}
},
{
"class": "DialogAgent",
"args": {
"name": "Agent-Guiding-Assistant",
"sys_prompt": "You're an assistant guiding the user to specific agent for help. The answer is in a cheerful styled language. The output starts with appreciation for the question. Next, rephrase the question in a simple declarative Sentence for example, 'I think you are asking...'. Last, if the question is about detailed code or example in AgentScope Framework, output '@ Code-Search-Assistant you might be suitable for answering the question'; if the question is about API or function calls (Example: 'Is there function related...' or 'how can I initialize ...' ) in AgentScope, output '@ API-Assistant, I think you are more suitable for the question, please tell us more about it'; if question is about where to find some context (Example:'where can I find...'), output '@ Searching-Assistant, we need your help', otherwise, output '@ Tutorial-Assistant, I think you are more suitable for the question, can you tell us more about it?'. The answer is expected to be only one sentence",
"model_config_name": "qwen_config",
"use_memory": false
}
}
]
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
[
{
"knowledge_id": "agentscope_code_rag",
"emb_model_config_name": "qwen_emb_config",
"chunk_size": 2048,
"chunk_overlap": 40,
"data_processing": [
{
"load_data": {
"loader": {
"create_object": true,
"module": "langchain_community.document_loaders",
"class": "DirectoryLoader",
"init_args": {
"path": "../../src/agentscope",
"recursive": true,
"glob": ["**/*.py"]
}
}
},
"data_parse": {
"splitter": {
"create_object": true,
"module": "langchain_text_splitters.python",
"class": "PythonCodeTextSplitter",
"init_args": {}
}
}
}
]
},
{
"knowledge_id": "agentscope_api_rag",
"emb_model_config_name": "qwen_emb_config",
"chunk_size": 1024,
"chunk_overlap": 40,
"data_processing": [
{
"load_data": {
"loader": {
"create_object": true,
"module": "langchain_community.document_loaders",
"class": "DirectoryLoader",
"init_args": {
"path": "../../docs/docstring_html/",
"glob": ["*.html"]
}
}
}
}
]
},
{
"knowledge_id": "agentscope_global_rag",
"emb_model_config_name": "qwen_emb_config",
"chunk_size": 2048,
"chunk_overlap": 40,
"data_processing": [
{
"load_data": {
"loader": {
"create_object": true,
"module": "langchain_community.document_loaders",
"class": "DirectoryLoader",
"init_args": {
"path": "../../docs/sphinx_doc/en/source/tutorial",
"glob": ["*.md"]
}
}
}
},
{
"load_data": {
"loader": {
"create_object": true,
"module": "langchain_community.document_loaders",
"class": "DirectoryLoader",
"init_args": {
"path": "../../src/agentscope",
"recursive": true,
"glob": ["**/*.py"]
}
}
},
"data_parse": {
"splitter": {
"create_object": true,
"module": "langchain_text_splitters.python",
"class": "PythonCodeTextSplitter",
"init_args": {}
}
}
}
]
}
]
12 changes: 11 additions & 1 deletion examples/conversation_with_RAG_agents/rag_example.py
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,16 @@ def main() -> None:
},
)

# # if use langchain knowledge, we need to set backend_engine
# knowledge_bank.add_data_as_knowledge(
# knowledge_id="agentscope_tutorial_rag",
# emb_model_name="qwen_emb_config",
# data_dirs_and_types={
# "../../docs/sphinx_doc/en/source/tutorial": ["*.md"],
# },
# backend_engine="langchain"
# )

# let knowledgebank to equip rag agent with a (set of) knowledge
# corresponding to its knowledge_id_list
for agent in rag_agent_list:
Expand All @@ -104,7 +114,7 @@ def main() -> None:
rag_agent_descriptions = [
"agent name: "
+ agent.name
+ "\n agent description"
+ "\n agent description: "
+ agent.description
+ "\n"
for agent in rag_agent_list
Expand Down
1 change: 1 addition & 0 deletions setup.py
Original file line number Diff line number Diff line change
Expand Up @@ -86,6 +86,7 @@

extra_rag_requires = [
"llama-index==0.10.30",
"langchain==0.3.1",
]

# API requires
Expand Down
6 changes: 5 additions & 1 deletion src/agentscope/agents/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,7 +6,10 @@
from .dict_dialog_agent import DictDialogAgent
from .user_agent import UserAgent
from .react_agent import ReActAgent
from .rag_agent import LlamaIndexAgent
from .rag_agent import (
LlamaIndexAgent,
LangChainAgent,
)


__all__ = [
Expand All @@ -17,4 +20,5 @@
"UserAgent",
"ReActAgent",
"LlamaIndexAgent",
"LangChainAgent",
]
163 changes: 162 additions & 1 deletion src/agentscope/agents/rag_agent.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
# -*- coding: utf-8 -*-
"""
This example shows how to build an agent with RAG
with LlamaIndex.
with LlamaIndex and LangChain.

Notice, this is a Beta version of RAG agent.
"""
Expand All @@ -12,6 +12,7 @@
from agentscope.agents.agent import AgentBase
from agentscope.message import Msg
from agentscope.rag import Knowledge
from agentscope.utils.common import _convert_to_str

CHECKING_PROMPT = """
Is the retrieved content relevant to the query?
Expand Down Expand Up @@ -192,3 +193,163 @@ def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg:
self.memory.add(msg)

return msg


class LangChainAgent(AgentBase):
"""
A LangChain agent build on LangChain.
"""

def __init__(
self,
name: str,
sys_prompt: str,
model_config_name: str,
knowledge_list: list[Knowledge] = None,
knowledge_id_list: list[str] = None,
similarity_top_k: int = None,
search_type: str = "similarity",
search_kwargs: dict = None,
log_retrieval: bool = True,
recent_n_mem_for_retrieve: int = 1,
**kwargs: Any,
) -> None:
"""
Initialize the RAG LlamaIndexAgent
Args:
name (str):
the name for the agent
sys_prompt (str):
system prompt for the RAG agent
model_config_name (str):
language model for the agent
knowledge_list (list[Knowledge]):
a list of knowledge.
User can choose to pass a list knowledge object
directly when initializing the RAG agent. Another
choice can be passing a list of knowledge ids and
obtain the knowledge with the `equip` function of a
knowledge bank.
knowledge_id_list (list[Knowledge]):
a list of id of the knowledge.
This is designed for easy setting up multiple RAG
agents with a config file. To obtain the knowledge
objects, users can pass this agent to the `equip`
function in a knowledge bank to add corresponding
knowledge to agent's self.knowledge_list.
search_type (str):
the type of search to be performed on the
Langchain knowledge
search_kwargs (dict):
additional keyword arguments for the
search operation on the Langchain knowledge
log_retrieval (bool):
whether to print the retrieved content
recent_n_mem_for_retrieve (int):
the number of pieces of memory used as part of
retrival query
"""
super().__init__(
name=name,
sys_prompt=sys_prompt,
model_config_name=model_config_name,
)
self.knowledge_list = knowledge_list or []
self.knowledge_id_list = knowledge_id_list or []
self.similarity_top_k = similarity_top_k
self.search_type = search_type
self.search_kwargs = search_kwargs or {}
self.log_retrieval = log_retrieval
self.recent_n_mem_for_retrieve = recent_n_mem_for_retrieve
self.description = kwargs.get("description", "")

def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg:
"""
Reply function of the langchain agent.
Processes the input data,
1) use the input data to retrieve with RAG function;
2) generates a prompt using the current memory and system
prompt;
3) invokes the language model to produce a response. The
response is then formatted and added to the dialogue memory.

Args:
x (`Optional[Union[Msg, Sequence[Msg]]]`, defaults to `None`):
The input message(s) to the agent, which also can be omitted if
the agent doesn't need any input.

Returns:
`Msg`: The output message generated by the agent.
"""
retrieved_docs_to_string = ""
# record the input if needed
if self.memory:
self.memory.add(x)
# in case no input is provided (e.g., in msghub),
# use the memory as query
history = self.memory.get_memory(
recent_n=self.recent_n_mem_for_retrieve,
)
query = (
"/n".join(
[msg.content for msg in history],
)
if isinstance(history, list)
else str(history)
)
elif x is not None:
query = x.content
else:
query = ""

if len(query) > 0:
# when content has information, do retrieval
for knowledge in self.knowledge_list:
retrieved_nodes = knowledge.retrieve(
str(query),
self.similarity_top_k,
search_type=self.search_type,
search_kwargs=self.search_kwargs,
)

for document in retrieved_nodes:
retrieved_docs_to_string += (
"\n>>>> source:"
+ _convert_to_str(document.metadata)
+ "\n>>>> content:"
+ document.page_content
)

if self.log_retrieval:
self.speak("[retrieved]:" + retrieved_docs_to_string)

# prepare prompt
prompt = self.model.format(
Msg(
name="system",
role="system",
content=self.sys_prompt,
),
# {"role": "system", "content": retrieved_docs_to_string},
self.memory.get_memory(
recent_n=self.recent_n_mem_for_retrieve,
),
Msg(
name="user",
role="user",
content="Context: " + retrieved_docs_to_string,
),
)

# call llm and generate response
response = self.model(prompt).text
msg = Msg(self.name, response, "assistant")

# Print/speak the message in this agent's voice
self.speak(msg)

if self.memory:
# Record the message in memory
self.memory.add(msg)

return msg
Loading