modelscope · cmgzn · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024 · Oct 14, 2024
diff --git a/examples/conversation_with_RAG_agents/configs/lc_agent_config.json b/examples/conversation_with_RAG_agents/configs/lc_agent_config.json
@@ -0,0 +1,62 @@
+[
+  {
+    "class": "LangChainAgent",
+    "args": {
+      "name": "Tutorial-Assistant",
+      "description": "Tutorial-Assistant is an agent that can provide answer based on English tutorial material, mainly the markdown files. It can answer general questions about AgentScope.",
+      "sys_prompt": "You're an assistant helping new users to use AgentScope. The language style is helpful and cheerful. You generate answers based on the provided context. The answer is expected to be no longer than 100 words. If the key words of the question can be found in the provided context, the answer should contain the section name which contains the answer. For example, 'You may refer to SECTION_NAME for more details.'",
+      "model_config_name": "qwen_config",
+      "knowledge_id_list": ["agentscope_tutorial_rag"],
+      "similarity_top_k": 5,
+      "log_retrieval": false,
+      "recent_n_mem_for_retrieve": 1
+    }
+  },
+  {
+    "class": "LangChainAgent",
+    "args": {
+      "name": "Code-Search-Assistant",
+      "description": "Code-Search-Assistant is an agent that can provide answer based on AgentScope code base. It can answer questions about specific modules in AgentScope.",
+      "sys_prompt": "You're a coding assistant of AgentScope. The answer starts with appreciation for the question, then provide details regarding the functionality and features of the modules mentioned in the question. The language should be in a professional and simple style. The answer is limited to be less than 100 words.",
+      "model_config_name": "qwen_config",
+      "knowledge_id_list": ["agentscope_code_rag"],
+      "search_type": "similarity",
+      "log_retrieval": true,
+      "recent_n_mem_for_retrieve": 1
+    }
+  },
+  {
+    "class": "LangChainAgent",
+    "args": {
+      "name": "API-Assistant",
+      "description": "API-Assistant is an agent that can answer questions about APIs in AgentScope. It can answer general questions about AgentScope.",
+      "sys_prompt": "You're an assistant providing answers to the questions related to APIs (functions and classes) in AgentScope. The language style is helpful and cheerful. You generate answers based on the provided context. The answer is expected to be no longer than 200 words. If the key words of the question can be found in the provided context, the answer should contain the module of the API. For example, 'You may refer to MODULE_NAME for more details.'",
+      "model_config_name": "qwen_config",
+      "knowledge_id_list": ["agentscope_api_rag"],
+      "search_kwargs": {"k": 2},
+      "log_retrieval": false,
+      "recent_n_mem_for_retrieve": 1
+    }
+  },
+  {
+    "class": "LangChainAgent",
+    "args": {
+      "name": "Searching-Assistant",
+      "description": "Search-Assistant is an agent that can provide answer based on AgentScope code and tutorial. It can answer questions about everything in AgentScope codes and tutorials.",
+      "sys_prompt": "You're a helpful assistant of AgentScope. The answer starts with appreciation for the question, then provide output the location of the code or section that the most relevant to the question. The answer is limited to be less than 50 words.",
+      "model_config_name": "qwen_config",
+      "knowledge_id_list": ["agentscope_code_rag","agentscope_tutorial_rag"],
+      "log_retrieval": false,
+      "recent_n_mem_for_retrieve": 1
+    }
+  },
+  {
+    "class": "DialogAgent",
+    "args": {
+      "name": "Agent-Guiding-Assistant",
+      "sys_prompt": "You're an assistant guiding the user to specific agent for help. The answer is in a cheerful styled language. The output starts with appreciation for the question. Next, rephrase the question in a simple declarative Sentence for example, 'I think you are asking...'. Last, if the question is about detailed code or example in AgentScope Framework, output '@ Code-Search-Assistant you might be suitable for answering the question'; if the question is about API or function calls (Example: 'Is there function related...' or 'how can I initialize ...' ) in AgentScope, output '@ API-Assistant, I think you are more suitable for the question, please tell us more about it'; if question is about where to find some context (Example:'where can I find...'), output '@ Searching-Assistant, we need your help', otherwise, output '@ Tutorial-Assistant, I think you are more suitable for the question, can you tell us more about it?'. The answer is expected to be only one sentence",
+      "model_config_name": "qwen_config",
+      "use_memory": false
+    }
+  }
+]
diff --git a/examples/conversation_with_RAG_agents/configs/lc_knowledge_config.json b/examples/conversation_with_RAG_agents/configs/lc_knowledge_config.json
@@ -0,0 +1,96 @@
+[
+  {
+    "knowledge_id": "agentscope_code_rag",
+    "emb_model_config_name": "qwen_emb_config",
+    "chunk_size": 2048,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "langchain_community.document_loaders",
+            "class": "DirectoryLoader",
+            "init_args": {
+              "path": "../../src/agentscope",
+              "recursive": true,
+              "glob": ["**/*.py"]
+            }
+          }
+        },
+        "data_parse": {
+          "splitter": {
+            "create_object": true,
+            "module": "langchain_text_splitters.python",
+            "class": "PythonCodeTextSplitter",
+            "init_args": {}
+          }
+        }
+      }
+    ]
+  },
+  {
+    "knowledge_id": "agentscope_api_rag",
+    "emb_model_config_name": "qwen_emb_config",
+    "chunk_size": 1024,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "langchain_community.document_loaders",
+            "class": "DirectoryLoader",
+            "init_args": {
+              "path": "../../docs/docstring_html/",
+              "glob": ["*.html"]
+            }
+          }
+        }
+      }
+    ]
+  },
+  {
+    "knowledge_id": "agentscope_global_rag",
+    "emb_model_config_name": "qwen_emb_config",
+    "chunk_size": 2048,
+    "chunk_overlap": 40,
+    "data_processing": [
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "langchain_community.document_loaders",
+            "class": "DirectoryLoader",
+            "init_args": {
+              "path": "../../docs/sphinx_doc/en/source/tutorial",
+              "glob": ["*.md"]
+            }
+          }
+        }
+      },
+      {
+        "load_data": {
+          "loader": {
+            "create_object": true,
+            "module": "langchain_community.document_loaders",
+            "class": "DirectoryLoader",
+            "init_args": {
+              "path": "../../src/agentscope",
+              "recursive": true,
+              "glob": ["**/*.py"]
+            }
+          }
+        },
+        "data_parse": {
+          "splitter": {
+            "create_object": true,
+            "module": "langchain_text_splitters.python",
+            "class": "PythonCodeTextSplitter",
+            "init_args": {}
+          }
+        }
+      }
+    ]
+  }
+]
diff --git a/examples/conversation_with_RAG_agents/rag_example.py b/examples/conversation_with_RAG_agents/rag_example.py
@@ -78,6 +78,16 @@ def main() -> None:
         },
     )
 
+    # # if use langchain knowledge, we need to set backend_engine
+    # knowledge_bank.add_data_as_knowledge(
+    #     knowledge_id="agentscope_tutorial_rag",
+    #     emb_model_name="qwen_emb_config",
+    #     data_dirs_and_types={
+    #         "../../docs/sphinx_doc/en/source/tutorial": ["*.md"],
+    #     },
+    #     backend_engine="langchain"
+    # )
+
     # let knowledgebank to equip rag agent with a (set of) knowledge
     # corresponding to its knowledge_id_list
     for agent in rag_agent_list:
@@ -104,7 +114,7 @@ def main() -> None:
     rag_agent_descriptions = [
         "agent name: "
         + agent.name
-        + "\n agent description："
+        + "\n agent description: "
         + agent.description
         + "\n"
         for agent in rag_agent_list

diff --git a/setup.py b/setup.py
@@ -86,6 +86,7 @@
 
 extra_rag_requires = [
     "llama-index==0.10.30",
+    "langchain==0.3.1",
 ]
 
 # API requires

diff --git a/src/agentscope/agents/__init__.py b/src/agentscope/agents/__init__.py
@@ -6,7 +6,10 @@
 from .dict_dialog_agent import DictDialogAgent
 from .user_agent import UserAgent
 from .react_agent import ReActAgent
-from .rag_agent import LlamaIndexAgent
+from .rag_agent import (
+    LlamaIndexAgent,
+    LangChainAgent,
+)
 
 
 __all__ = [
@@ -17,4 +20,5 @@
     "UserAgent",
     "ReActAgent",
     "LlamaIndexAgent",
+    "LangChainAgent",
 ]
diff --git a/src/agentscope/agents/rag_agent.py b/src/agentscope/agents/rag_agent.py
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 """
 This example shows how to build an agent with RAG
-with LlamaIndex.
+with LlamaIndex and LangChain.
 
 Notice, this is a Beta version of RAG agent.
 """
@@ -12,6 +12,7 @@
 from agentscope.agents.agent import AgentBase
 from agentscope.message import Msg
 from agentscope.rag import Knowledge
+from agentscope.utils.common import _convert_to_str
 
 CHECKING_PROMPT = """
                 Is the retrieved content relevant to the query?
@@ -192,3 +193,163 @@ def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg:
             self.memory.add(msg)
 
         return msg
+
+
+class LangChainAgent(AgentBase):
+    """
+    A LangChain agent build on LangChain.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        knowledge_list: list[Knowledge] = None,
+        knowledge_id_list: list[str] = None,
+        similarity_top_k: int = None,
+        search_type: str = "similarity",
+        search_kwargs: dict = None,
+        log_retrieval: bool = True,
+        recent_n_mem_for_retrieve: int = 1,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Initialize the RAG LlamaIndexAgent
+        Args:
+            name (str):
+                the name for the agent
+            sys_prompt (str):
+                system prompt for the RAG agent
+            model_config_name (str):
+                language model for the agent
+            knowledge_list (list[Knowledge]):
+                a list of knowledge.
+                User can choose to pass a list knowledge object
+                directly when initializing the RAG agent. Another
+                choice can be passing a list of knowledge ids and
+                obtain the knowledge with the `equip` function of a
+                knowledge bank.
+            knowledge_id_list (list[Knowledge]):
+                a list of id of the knowledge.
+                This is designed for easy setting up multiple RAG
+                agents with a config file. To obtain the knowledge
+                objects, users can pass this agent to the `equip`
+                function in a knowledge bank to add corresponding
+                knowledge to agent's self.knowledge_list.
+            search_type (str):
+                the type of search to be performed on the
+                Langchain knowledge
+            search_kwargs (dict):
+                additional keyword arguments for the
+                search operation on the Langchain knowledge
+            log_retrieval (bool):
+                whether to print the retrieved content
+            recent_n_mem_for_retrieve (int):
+                the number of pieces of memory used as part of
+                retrival query
+        """
+        super().__init__(
+            name=name,
+            sys_prompt=sys_prompt,
+            model_config_name=model_config_name,
+        )
+        self.knowledge_list = knowledge_list or []
+        self.knowledge_id_list = knowledge_id_list or []
+        self.similarity_top_k = similarity_top_k
+        self.search_type = search_type
+        self.search_kwargs = search_kwargs or {}
+        self.log_retrieval = log_retrieval
+        self.recent_n_mem_for_retrieve = recent_n_mem_for_retrieve
+        self.description = kwargs.get("description", "")
+
+    def reply(self, x: Optional[Union[Msg, Sequence[Msg]]] = None) -> Msg:
+        """
+        Reply function of the langchain agent.
+        Processes the input data,
+        1) use the input data to retrieve with RAG function;
+        2) generates a prompt using the current memory and system
+        prompt;
+        3) invokes the language model to produce a response. The
+        response is then formatted and added to the dialogue memory.
+
+        Args:
+            x (`Optional[Union[Msg, Sequence[Msg]]]`, defaults to `None`):
+                The input message(s) to the agent, which also can be omitted if
+                the agent doesn't need any input.
+
+        Returns:
+            `Msg`: The output message generated by the agent.
+        """
+        retrieved_docs_to_string = ""
+        # record the input if needed
+        if self.memory:
+            self.memory.add(x)
+            # in case no input is provided (e.g., in msghub),
+            # use the memory as query
+            history = self.memory.get_memory(
+                recent_n=self.recent_n_mem_for_retrieve,
+            )
+            query = (
+                "/n".join(
+                    [msg.content for msg in history],
+                )
+                if isinstance(history, list)
+                else str(history)
+            )
+        elif x is not None:
+            query = x.content
+        else:
+            query = ""
+
+        if len(query) > 0:
+            # when content has information, do retrieval
+            for knowledge in self.knowledge_list:
+                retrieved_nodes = knowledge.retrieve(
+                    str(query),
+                    self.similarity_top_k,
+                    search_type=self.search_type,
+                    search_kwargs=self.search_kwargs,
+                )
+
+                for document in retrieved_nodes:
+                    retrieved_docs_to_string += (
+                        "\n>>>> source:"
+                        + _convert_to_str(document.metadata)
+                        + "\n>>>> content:"
+                        + document.page_content
+                    )
+
+            if self.log_retrieval:
+                self.speak("[retrieved]:" + retrieved_docs_to_string)
+
+        # prepare prompt
+        prompt = self.model.format(
+            Msg(
+                name="system",
+                role="system",
+                content=self.sys_prompt,
+            ),
+            # {"role": "system", "content": retrieved_docs_to_string},
+            self.memory.get_memory(
+                recent_n=self.recent_n_mem_for_retrieve,
+            ),
+            Msg(
+                name="user",
+                role="user",
+                content="Context: " + retrieved_docs_to_string,
+            ),
+        )
+
+        # call llm and generate response
+        response = self.model(prompt).text
+        msg = Msg(self.name, response, "assistant")
+
+        # Print/speak the message in this agent's voice
+        self.speak(msg)
+
+        if self.memory:
+            # Record the message in memory
+            self.memory.add(msg)
+
+        return msg