From f762dd69e22ef750b956db3d3d81452cbd3841c9 Mon Sep 17 00:00:00 2001
From: ZiTao-Li <135263265+ZiTao-Li@users.noreply.github.com>
Date: Fri, 29 Mar 2024 16:39:21 +0800
Subject: [PATCH] Adding RAG example; [TODO] Abstract a RAG module and add it
 into repository (#64)

---
 README.md                                     |   1 +
 README_ZH.md                                  |   1 +
 .../conversation_with_RAG_agents/README.md    |  57 +++
 .../agent_config.json                         |  79 +++++
 .../rag/__init__.py                           |  18 +
 .../rag/langchain_rag.py                      | 208 +++++++++++
 .../rag/llama_index_rag.py                    | 320 +++++++++++++++++
 .../conversation_with_RAG_agents/rag/rag.py   | 118 +++++++
 .../rag_agents.py                             | 332 ++++++++++++++++++
 .../rag_example.py                            |  64 ++++
 10 files changed, 1198 insertions(+)
 create mode 100644 examples/conversation_with_RAG_agents/README.md
 create mode 100644 examples/conversation_with_RAG_agents/agent_config.json
 create mode 100644 examples/conversation_with_RAG_agents/rag/__init__.py
 create mode 100644 examples/conversation_with_RAG_agents/rag/langchain_rag.py
 create mode 100644 examples/conversation_with_RAG_agents/rag/llama_index_rag.py
 create mode 100644 examples/conversation_with_RAG_agents/rag/rag.py
 create mode 100644 examples/conversation_with_RAG_agents/rag_agents.py
 create mode 100644 examples/conversation_with_RAG_agents/rag_example.py

diff --git a/README.md b/README.md
index dee94f5d7..2c68bb324 100644
--- a/README.md
+++ b/README.md
@@ -107,6 +107,7 @@ the following libraries.
   - [Self-Organizing Conversation](./examples/conversation_self_organizing)
   - [Basic Conversation with LangChain library](./examples/conversation_with_langchain)
   - [Conversation with ReAct Agent](./examples/conversation_with_react_agent)
+  - [Conversation with RAG Agent](./examples/conversation_with_RAG_agents)
 
 - Game
   - [Gomoku](./examples/game_gomoku)
diff --git a/README_ZH.md b/README_ZH.md
index 125c684b4..53a542c25 100644
--- a/README_ZH.md
+++ b/README_ZH.md
@@ -95,6 +95,7 @@ AgentScope支持使用以下库快速部署本地模型服务。
   - [智能体自组织的对话](./examples/conversation_self_organizing)
   - [兼容LangChain的基础对话](./examples/conversation_with_langchain)
   - [与ReAct智能体对话](./examples/conversation_with_react_agent)
+  - [与RAG智能体对话](./examples/conversation_with_RAG_agents)
 
 - 游戏
   - [五子棋](./examples/game_gomoku)
diff --git a/examples/conversation_with_RAG_agents/README.md b/examples/conversation_with_RAG_agents/README.md
new file mode 100644
index 000000000..5b08379a4
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/README.md
@@ -0,0 +1,57 @@
+# AgentScope Consultants: a Multi-Agent RAG Application
+
+* **What is this example about?**
+With the provided implementation and configuration,
+you will obtain three different agents who can help you answer different questions about AgentScope.
+
+* **What is this example for?** By this example, we want to show how the agent with retrieval augmented generation (RAG)
+capability can be used to build easily.
+
+**Notice:** This example is a Beta version of the AgentScope RAG agent. A formal version will soon be added to `src/agentscope/agents`, but it may be subject to changes.
+
+## Prerequisites
+* **Cloning repo:** This example requires cloning the whole AgentScope repo to local.
+* **Packages:** This example is built on the LlamaIndex package. Thus, some packages need to be installed before running the example.
+    ```bash
+    pip install llama-index tree_sitter tree-sitter-languages
+    ```
+* **Model APIs:** This example uses Dashscope APIs. Thus, we also need an API key for DashScope.
+  ```bash
+  export DASH_SCOPE_API='YOUR_API_KEY'
+  ```
+
+**Note:** This example has been tested with `dashscope_chat` and `dashscope_text_embedding` model wrapper, with `qwen-max` and `text-embedding-v2` models.
+However, you are welcome to replace the Dashscope language and embedding model wrappers or models with other models you like to test.
+
+## Start AgentScope Consultants
+* **Terminal:** The most simple way to execute the AgentScope Consultants is running in terminal.
+  ```bash
+  python ./rag_example.py
+  ```
+  Setting `log_retrieval` to `false` in `agent_config.json` can hide the retrieved information and provide only answers of agents.
+
+* **AS studio:** If you want to have more organized, clean UI, you can also run with our `as_studio`.
+  ```bash
+  as_studio ./rag_example.py
+  ```
+
+### Customize AgentScope Consultants to other consultants
+After you run the example, you may notice that this example consists of three RAG agents:
+* `AgentScope Tutorial Assistant`: responsible for answering questions based on AgentScope tutorials (markdown files).
+* `AgentScope Framework Code Assistant`: responsible for answering questions based on AgentScope code base (python files).
+* `Summarize Assistant`: responsible for summarize the questions from the above two agents.
+
+These agents can be configured to answering questions based on other GitHub repo, by simply modifying the `input_dir` fields in the `agent_config.json`.
+
+For more advanced customization, we may need to learn a little bit from the following.
+
+**RAG modules:** In AgentScope, RAG modules are abstract to provide three basic functions: `load_data`, `store_and_index` and `retrieve`. Refer to `src/agentscope/rag` for more details.
+
+**RAG configs:** In the example configuration (the `rag_config` field), all parameters are optional. But if you want to customize them, you may want to learn the following:
+*  `load_data`: contains all parameters for the the `rag.load_data` function.
+Since the `load_data` accepts a dataloader object `loader`, the `loader` in the config need to have `"create_object": true` to let a internal parse create a LlamaIndex data loader object.
+The loader object is an instance of `class` in module `module`, with initialization parameters in `init_args`.
+
+* `store_and_index`: contains all parameters for the the `rag.store_and_index` function.
+For example, you can pass `vector_store` and `retriever` configurations in a similar way as the `loader` mentioned above.
+For the `transformations` parameter, you can pass a list of dicts, each of which corresponds to building a `NodeParser`-kind of preprocessor in Llamaindex.
\ No newline at end of file
diff --git a/examples/conversation_with_RAG_agents/agent_config.json b/examples/conversation_with_RAG_agents/agent_config.json
new file mode 100644
index 000000000..fc0a23c12
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/agent_config.json
@@ -0,0 +1,79 @@
+[
+  {
+    "class": "LlamaIndexAgent",
+    "args": {
+      "name": "AgentScope Tutorial Assistant",
+      "sys_prompt": "You're a helpful assistant. You need to generate answers based on the provided context.",
+      "model_config_name": "qwen_config",
+      "emb_model_config_name": "qwen_emb_config",
+      "rag_config": {
+            "load_data": {
+              "loader": {
+                "create_object": true,
+                "module": "llama_index.core",
+                "class": "SimpleDirectoryReader",
+                "init_args": {
+                  "input_dir": "../../docs/sphinx_doc/en/source/tutorial/",
+                  "required_exts": [".md"]
+                }
+              }
+            },
+            "chunk_size": 2048,
+            "chunk_overlap": 40,
+            "similarity_top_k": 10,
+            "log_retrieval": false,
+            "recent_n_mem": 1
+      }
+    }
+  },
+  {
+    "class": "LlamaIndexAgent",
+    "args": {
+      "name": "AgentScope Framework Code Assistant",
+      "sys_prompt": "You're a helpful assistant about coding. You can very familiar with the framework code of AgentScope.",
+      "model_config_name": "qwen_config",
+      "emb_model_config_name": "qwen_emb_config",
+      "rag_config": {
+            "load_data": {
+              "loader": {
+                "create_object": true,
+                "module": "llama_index.core",
+                "class": "SimpleDirectoryReader",
+                "init_args": {
+                  "input_dir": "../../src/agentscope",
+                  "recursive": true,
+                  "required_exts": [".py"]
+                }
+              }
+            },
+            "store_and_index": {
+              "transformations": [
+                {
+                  "create_object": true,
+                  "module": "llama_index.core.node_parser",
+                  "class": "CodeSplitter",
+                  "init_args": {
+                    "language": "python",
+                    "chunk_lines": 100
+                  }
+                }
+              ]
+            },
+            "chunk_size": 2048,
+            "chunk_overlap": 40,
+            "similarity_top_k": 10,
+            "log_retrieval": false,
+            "recent_n_mem": 1
+      }
+    }
+  },
+  {
+    "class": "DialogAgent",
+    "args": {
+      "name": "Summarize Assistant",
+      "sys_prompt": "You are a helpful assistant that can summarize the answers of the previous two messages.",
+      "model_config_name": "qwen_config",
+      "use_memory": true
+    }
+  }
+]
\ No newline at end of file
diff --git a/examples/conversation_with_RAG_agents/rag/__init__.py b/examples/conversation_with_RAG_agents/rag/__init__.py
new file mode 100644
index 000000000..3c8f48882
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag/__init__.py
@@ -0,0 +1,18 @@
+# -*- coding: utf-8 -*-
+""" Import all pipeline related modules in the package. """
+from .rag import RAGBase
+
+from .llama_index_rag import LlamaIndexRAG
+
+
+try:
+    from .langchain_rag import LangChainRAG
+except Exception:
+    LangChainRAG = None  # type: ignore # NOQA
+
+
+__all__ = [
+    "RAGBase",
+    "LlamaIndexRAG",
+    "LangChainRAG",
+]
diff --git a/examples/conversation_with_RAG_agents/rag/langchain_rag.py b/examples/conversation_with_RAG_agents/rag/langchain_rag.py
new file mode 100644
index 000000000..36a329547
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag/langchain_rag.py
@@ -0,0 +1,208 @@
+# -*- coding: utf-8 -*-
+"""
+This module is integrate the LangChain RAG model into our AgentScope package
+"""
+
+
+from typing import Any, Optional, Union
+
+try:
+    from langchain_core.vectorstores import VectorStore
+    from langchain_core.documents import Document
+    from langchain_core.embeddings import Embeddings
+    from langchain_community.document_loaders.base import BaseLoader
+    from langchain_community.vectorstores import Chroma
+    from langchain_text_splitters.base import TextSplitter
+    from langchain_text_splitters import CharacterTextSplitter
+except ImportError:
+    VectorStore = None
+    Document = None
+    Embeddings = None
+    BaseLoader = None
+    Chroma = None
+    TextSplitter = None
+    CharacterTextSplitter = None
+
+from examples.conversation_with_RAG_agents.rag import RAGBase
+from examples.conversation_with_RAG_agents.rag.rag import (
+    DEFAULT_CHUNK_OVERLAP,
+    DEFAULT_CHUNK_SIZE,
+)
+from agentscope.models import ModelWrapperBase
+
+
+class _LangChainEmbModel(Embeddings):
+    """
+    Dummy wrapper to convert the ModelWrapperBase embedding model
+    to a LanguageChain RAG model
+    """
+
+    def __init__(self, emb_model: ModelWrapperBase) -> None:
+        """
+        Dummy wrapper
+        Args:
+            emb_model (ModelWrapperBase): embedding model of
+                ModelWrapperBase type
+        """
+        self._emb_model_wrapper = emb_model
+
+    def embed_documents(self, texts: list[str]) -> list[list[float]]:
+        """
+        Wrapper function for embedding list of documents
+        Args:
+            texts (list[str]): list of texts to be embedded
+        """
+        results = [
+            list(self._emb_model_wrapper(t).embedding[0]) for t in texts
+        ]
+        return results
+
+    def embed_query(self, text: str) -> list[float]:
+        """
+        Wrapper function for embedding a single query
+        Args:
+            text (str): query to be embedded
+        """
+        return list(self._emb_model_wrapper(text).embedding[0])
+
+
+class LangChainRAG(RAGBase):
+    """
+    This class is a wrapper around the LangChain RAG.
+    """
+
+    def __init__(
+        self,
+        model: Optional[ModelWrapperBase],
+        emb_model: Union[ModelWrapperBase, Embeddings, None],
+        config: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        Initializes the LangChainRAG
+        Args:
+            model (ModelWrapperBase):
+                The language model used for final synthesis
+            emb_model ( Union[ModelWrapperBase, Embeddings, None]):
+                The embedding model used for generate embeddings
+            config (dict):
+                The additional configuration for llama index rag
+        """
+        super().__init__(model, emb_model, **kwargs)
+
+        self.loader = None
+        self.splitter = None
+        self.retriever = None
+        self.vector_store = None
+
+        if VectorStore is None:
+            raise ImportError(
+                "Please install LangChain RAG packages to use LangChain RAG.",
+            )
+
+        self.config = config or {}
+        if isinstance(emb_model, ModelWrapperBase):
+            self.emb_model = _LangChainEmbModel(emb_model)
+        elif isinstance(emb_model, Embeddings):
+            self.emb_model = emb_model
+        else:
+            raise TypeError(
+                f"Embedding model does not support {type(self.emb_model)}.",
+            )
+
+    def load_data(
+        self,
+        loader: BaseLoader,
+        query: Optional[Any] = None,
+        **kwargs: Any,
+    ) -> list[Document]:
+        # pylint: disable=unused-argument
+        """
+        Loading data from a directory
+        Args:
+            loader (BaseLoader):
+                accepting a LangChain loader instance
+            query (str):
+                accepting a query, LangChain does not rely on this
+        Returns:
+            list[Document]: a list of documents loaded
+        """
+        self.loader = loader
+        docs = self.loader.load()
+        return docs
+
+    def store_and_index(
+        self,
+        docs: Any,
+        vector_store: Optional[VectorStore] = None,
+        splitter: Optional[TextSplitter] = None,
+        **kwargs: Any,
+    ) -> Any:
+        # pylint: disable=unused-argument
+        """
+        Preprocessing the loaded documents.
+        Args:
+            docs (Any):
+                documents to be processed
+            vector_store (Optional[VectorStore]):
+                vector store in LangChain RAG
+            splitter (Optional[TextSplitter]):
+                optional, specifies the splitter to preprocess
+                the documents
+
+        Returns:
+            None
+
+        In LlamaIndex terms, an Index is a data structure composed
+        of Document objects, designed to enable querying by an LLM.
+        For example:
+        1) preprocessing documents with
+        2) generate embedding,
+        3) store the embedding-content to vdb
+        """
+        self.splitter = splitter or CharacterTextSplitter(
+            chunk_size=self.config.get("chunk_size", DEFAULT_CHUNK_SIZE),
+            chunk_overlap=self.config.get(
+                "chunk_overlap",
+                DEFAULT_CHUNK_OVERLAP,
+            ),
+        )
+        all_splits = self.splitter.split_documents(docs)
+
+        # indexing the chunks and store them into the vector store
+        if vector_store is None:
+            vector_store = Chroma()
+        self.vector_store = vector_store.from_documents(
+            documents=all_splits,
+            embedding=self.emb_model,
+        )
+
+        # build retriever
+        search_type = self.config.get("search_type", "similarity")
+        self.retriever = self.vector_store.as_retriever(
+            search_type=search_type,
+            search_kwargs={
+                "k": self.config.get("similarity_top_k", 6),
+            },
+        )
+
+    def retrieve(self, query: Any, to_list_strs: bool = False) -> list[Any]:
+        """
+        This is a basic retrieve function with LangChain APIs
+        Args:
+          query: query is expected to be a question in string
+
+        Returns:
+            list of answers
+
+        More advanced retriever can refer to
+        https://python.langchain.com/docs/modules/data_connection/retrievers/
+        """
+
+        retrieved_docs = self.retriever.invoke(query)
+        if to_list_strs:
+            results = []
+            for doc in retrieved_docs:
+                results.append(doc.page_content)
+            return results
+        return retrieved_docs
diff --git a/examples/conversation_with_RAG_agents/rag/llama_index_rag.py b/examples/conversation_with_RAG_agents/rag/llama_index_rag.py
new file mode 100644
index 000000000..8756856ff
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag/llama_index_rag.py
@@ -0,0 +1,320 @@
+# -*- coding: utf-8 -*-
+"""
+This module is an integration of the Llama index RAG
+into AgentScope package
+"""
+
+from typing import Any, Optional, List, Union
+from loguru import logger
+
+try:
+    from llama_index.core.readers.base import BaseReader
+    from llama_index.core.base.base_retriever import BaseRetriever
+    from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding
+    from llama_index.core.ingestion import IngestionPipeline
+    from llama_index.core.vector_stores.types import (
+        BasePydanticVectorStore,
+        VectorStore,
+    )
+    from llama_index.core.bridge.pydantic import PrivateAttr
+    from llama_index.core.node_parser.interface import NodeParser
+    from llama_index.core.node_parser import SentenceSplitter
+    from llama_index.core import (
+        VectorStoreIndex,
+    )
+except ImportError:
+    BaseReader, BaseRetriever = None, None
+    BaseEmbedding, Embedding = None, None
+    IngestionPipeline, BasePydanticVectorStore, VectorStore = None, None, None
+    NodeParser, SentenceSplitter = None, None
+    VectorStoreIndex = None
+    PrivateAttr = None
+
+from rag import RAGBase
+from rag.rag import (
+    DEFAULT_CHUNK_SIZE,
+    DEFAULT_CHUNK_OVERLAP,
+    DEFAULT_TOP_K,
+)
+from agentscope.models import ModelWrapperBase
+
+
+class _EmbeddingModel(BaseEmbedding):
+    """
+    wrapper for ModelWrapperBase to an embedding model can be used
+    in Llama Index pipeline.
+    """
+
+    _emb_model_wrapper: ModelWrapperBase = PrivateAttr()
+
+    def __init__(
+        self,
+        emb_model: ModelWrapperBase,
+        embed_batch_size: int = 1,
+    ) -> None:
+        """
+        Dummy wrapper to convert a ModelWrapperBase to llama Index
+        embedding model
+
+        Args:
+            emb_model (ModelWrapperBase): embedding model in ModelWrapperBase
+            embed_batch_size (int): batch size, defaults to 1
+        """
+        super().__init__(
+            model_name="Temporary_embedding_wrapper",
+            embed_batch_size=embed_batch_size,
+        )
+        self._emb_model_wrapper = emb_model
+
+    def _get_query_embedding(self, query: str) -> List[float]:
+        """
+        get embedding for query
+        Args:
+            query (str): query to be embedded
+        """
+        # Note: AgentScope embedding model wrapper returns list of embedding
+        return list(self._emb_model_wrapper(query).embedding[0])
+
+    def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]:
+        """
+        get embedding for list of strings
+        Args:
+             texts ( List[str]): texts to be embedded
+        """
+        results = [
+            list(self._emb_model_wrapper(t).embedding[0]) for t in texts
+        ]
+        return results
+
+    def _get_text_embedding(self, text: str) -> Embedding:
+        """
+        get embedding for a single string
+        Args:
+             text (str): texts to be embedded
+        """
+        return list(self._emb_model_wrapper(text).embedding[0])
+
+    # TODO: use proper async methods, but depends on model wrapper
+    async def _aget_query_embedding(self, query: str) -> List[float]:
+        """The asynchronous version of _get_query_embedding."""
+        return self._get_query_embedding(query)
+
+    async def _aget_text_embedding(self, text: str) -> List[float]:
+        """Asynchronously get text embedding."""
+        return self._get_text_embedding(text)
+
+    async def _aget_text_embeddings(
+        self,
+        texts: List[str],
+    ) -> List[List[float]]:
+        """Asynchronously get text embeddings."""
+        return self._get_text_embeddings(texts)
+
+
+class LlamaIndexRAG(RAGBase):
+    """
+    This class is a wrapper with the llama index RAG.
+    """
+
+    def __init__(
+        self,
+        model: Optional[ModelWrapperBase],
+        emb_model: Union[ModelWrapperBase, BaseEmbedding, None] = None,
+        config: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> None:
+        """
+        RAG component based on llama index.
+        Args:
+            model (ModelWrapperBase):
+                The language model used for final synthesis
+            emb_model (Optional[ModelWrapperBase]):
+                The embedding model used for generate embeddings
+            config (dict):
+                The additional configuration for llama index rag
+        """
+        super().__init__(model, emb_model, config, **kwargs)
+        self.retriever = None
+        self.index = None
+        self.persist_dir = kwargs.get("persist_dir", "/")
+        self.emb_model = emb_model
+        print(self.config)
+
+        # ensure the emb_model is compatible with LlamaIndex
+        if isinstance(emb_model, ModelWrapperBase):
+            self.emb_model = _EmbeddingModel(emb_model)
+        elif isinstance(self.emb_model, BaseEmbedding):
+            pass
+        else:
+            raise TypeError(
+                f"Embedding model does not support {type(self.emb_model)}.",
+            )
+
+    def load_data(
+        self,
+        loader: BaseReader,
+        query: Optional[str] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Accept a loader, loading the desired data (no chunking)
+        Args:
+            loader (BaseReader):
+                object to load data, expected be an instance of class
+                inheriting from BaseReader in llama index.
+            query (Optional[str]):
+                optional, used when the data is in a database.
+
+        Returns:
+            Any: loaded documents
+
+        Example 1: use simple directory loader to load general documents,
+        including Markdown, PDFs, Word documents, PowerPoint decks, images,
+        audio and video.
+        ```
+            load_data_to_chunks(
+                loader=SimpleDirectoryReader("./data")
+            )
+        ```
+
+        Example 2: use SQL loader
+        ```
+            load_data_to_chunks(
+                DatabaseReader(
+                    scheme=os.getenv("DB_SCHEME"),
+                    host=os.getenv("DB_HOST"),
+                    port=os.getenv("DB_PORT"),
+                    user=os.getenv("DB_USER"),
+                    password=os.getenv("DB_PASS"),
+                    dbname=os.getenv("DB_NAME"),
+                ),
+                query = "SELECT * FROM users"
+            )
+        ```
+        """
+        if query is None:
+            documents = loader.load_data()
+        else:
+            documents = loader.load_data(query)
+        logger.info(f"loaded {len(documents)} documents")
+        return documents
+
+    def store_and_index(
+        self,
+        docs: Any,
+        vector_store: Union[BasePydanticVectorStore, VectorStore, None] = None,
+        retriever: Optional[BaseRetriever] = None,
+        transformations: Optional[list[NodeParser]] = None,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Preprocessing the loaded documents.
+        Args:
+            docs (Any):
+                documents to be processed, usually expected to be in
+                 llama index Documents.
+            vector_store (Union[BasePydanticVectorStore, VectorStore, None]):
+                vector store in llama index
+            retriever (Optional[BaseRetriever]):
+                optional, specifies the retriever in llama index to be used
+            transformations (Optional[list[NodeParser]]):
+                optional, specifies the transformations (operators) to
+                process documents (e.g., split the documents into smaller
+                chunks)
+
+        Return:
+            Any: return the index of the processed document
+
+        In LlamaIndex terms, an Index is a data structure composed
+        of Document objects, designed to enable querying by an LLM.
+        For example:
+        1) preprocessing documents with
+        2) generate embedding,
+        3) store the embedding-content to vdb
+        """
+        # build and run preprocessing pipeline
+        if transformations is None:
+            transformations = [
+                SentenceSplitter(
+                    chunk_size=self.config.get(
+                        "chunk_size",
+                        DEFAULT_CHUNK_SIZE,
+                    ),
+                    chunk_overlap=self.config.get(
+                        "chunk_overlap",
+                        DEFAULT_CHUNK_OVERLAP,
+                    ),
+                ),
+            ]
+
+        # adding embedding model as the last step of transformation
+        # https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/root.html
+        transformations.append(self.emb_model)
+
+        if vector_store is not None:
+            pipeline = IngestionPipeline(
+                transformations=transformations,
+                vector_store=vector_store,
+            )
+            _ = pipeline.run(docs)
+            self.index = VectorStoreIndex.from_vector_store(vector_store)
+        else:
+            # No vector store is provide, use simple in memory
+            pipeline = IngestionPipeline(
+                transformations=transformations,
+            )
+            nodes = pipeline.run(documents=docs)
+            self.index = VectorStoreIndex(
+                nodes=nodes,
+                embed_model=self.emb_model,
+            )
+
+        # set the retriever
+        if retriever is None:
+            logger.info(
+                f'{self.config.get("similarity_top_k", DEFAULT_TOP_K)}',
+            )
+            self.retriever = self.index.as_retriever(
+                embed_model=self.emb_model,
+                similarity_top_k=self.config.get(
+                    "similarity_top_k",
+                    DEFAULT_TOP_K,
+                ),
+                **kwargs,
+            )
+        else:
+            self.retriever = retriever
+        return self.index
+
+    def set_retriever(self, retriever: BaseRetriever) -> None:
+        """
+        Reset the retriever if necessary.
+        Args:
+            retriever (BaseRetriever): passing a retriever in llama index.
+        """
+        self.retriever = retriever
+
+    def retrieve(self, query: str, to_list_strs: bool = False) -> list[Any]:
+        """
+        This is a basic retrieve function
+        Args:
+            query (str):
+                query is expected to be a question in string
+            to_list_strs (book):
+                whether returns the list of strings;
+                if False, return NodeWithScore
+
+        Return:
+            list[Any]: list of str or NodeWithScore
+
+
+        More advanced query processing can refer to
+        https://docs.llamaindex.ai/en/stable/examples/query_transformations/query_transform_cookbook.html
+        """
+        retrieved = self.retriever.retrieve(str(query))
+        if to_list_strs:
+            results = []
+            for node in retrieved:
+                results.append(node.get_text())
+            return results
+        return retrieved
diff --git a/examples/conversation_with_RAG_agents/rag/rag.py b/examples/conversation_with_RAG_agents/rag/rag.py
new file mode 100644
index 000000000..0de27ca37
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag/rag.py
@@ -0,0 +1,118 @@
+# -*- coding: utf-8 -*-
+"""
+Base class module for retrieval augmented generation (RAG).
+To accommodate the RAG process of different packages,
+we abstract the RAG process into four stages:
+- data loading: loading data into memory for following processing;
+- data indexing and storage: document chunking, embedding generation,
+and off-load the data into VDB;
+- data retrieval: taking a query and return a batch of documents or
+document chunks;
+- post-processing of the retrieved data: use the retrieved data to
+generate an answer.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Any, Optional
+
+from agentscope.models import ModelWrapperBase
+
+DEFAULT_CHUNK_SIZE = 1024
+DEFAULT_CHUNK_OVERLAP = 20
+DEFAULT_TOP_K = 5
+
+
+class RAGBase(ABC):
+    """
+    Base class for RAG, CANNOT be instantiated directly
+    """
+
+    def __init__(
+        self,
+        model: Optional[ModelWrapperBase],
+        emb_model: Any = None,
+        config: Optional[dict] = None,
+        **kwargs: Any,
+    ) -> None:
+        # pylint: disable=unused-argument
+        self.postprocessing_model = model
+        self.emb_model = emb_model
+        self.config = config or {}
+
+    @abstractmethod
+    def load_data(
+        self,
+        loader: Any,
+        query: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Load data (documents) from disk to memory and chunking them
+        Args:
+            loader (Any): data loader, depending on the package
+            query (str): query for getting data from DB
+
+        Returns:
+            Any: loaded documents
+        """
+
+    @abstractmethod
+    def store_and_index(
+        self,
+        docs: Any,
+        vector_store: Any,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        Store and index the documents.
+        Args:
+            docs (Any):
+                documents to be processed, stored and indexed
+            vector_store (Any):
+                vector store to store the index and/or documents
+
+        Returns:
+            Any: can be indices, depending on the RAG package
+
+        preprocessing the loaded documents, for example:
+        1) chunking,
+        2) generate embedding,
+        3) store the embedding-content to vdb
+        """
+
+    @abstractmethod
+    def retrieve(self, query: Any, to_list_strs: bool = False) -> list[Any]:
+        """
+        retrieve list of content from vdb to memory
+        Args:
+            query (Any): query to retrieve
+            to_list_strs (bool): whether return a list of str
+
+        Returns:
+            return a list with retrieved documents (in strings)
+        """
+
+    def post_processing(
+        self,
+        retrieved_docs: list[str],
+        prompt: str,
+        **kwargs: Any,
+    ) -> Any:
+        """
+        A default solution for post-processing function, generates answer
+        based on the retrieved documents.
+        Args:
+            retrieved_docs (list[str]):
+                list of retrieved documents
+            prompt (str):
+                prompt for LLM generating answer with the retrieved documents
+
+        Returns:
+            Any: a synthesized answer from LLM with retrieved documents
+
+        Example:
+            self.postprocessing_model(prompt.format(retrieved_docs))
+        """
+        assert self.postprocessing_model
+        prompt = prompt.format("\n".join(retrieved_docs))
+        return self.postprocessing_model(prompt, **kwargs).text
diff --git a/examples/conversation_with_RAG_agents/rag_agents.py b/examples/conversation_with_RAG_agents/rag_agents.py
new file mode 100644
index 000000000..101b2e305
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag_agents.py
@@ -0,0 +1,332 @@
+# -*- coding: utf-8 -*-
+"""
+This example shows how to build an agent with RAG
+with LlamaIndex.
+
+Notice, this is a Beta version of RAG agent.
+"""
+
+from abc import ABC, abstractmethod
+from typing import Optional, Any
+import importlib
+from loguru import logger
+
+from rag import RAGBase, LlamaIndexRAG
+
+from agentscope.agents.agent import AgentBase
+from agentscope.message import Msg
+from agentscope.models import load_model_by_config_name
+
+
+class RAGAgentBase(AgentBase, ABC):
+    """
+    Base class for RAG agents
+    """
+
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        emb_model_config_name: str,
+        memory_config: Optional[dict] = None,
+        rag_config: Optional[dict] = None,
+    ) -> None:
+        """
+        Initialize the RAG base agent
+        Args:
+            name (str):
+                the name for the agent.
+            sys_prompt (str):
+                system prompt for the RAG agent.
+            model_config_name (str):
+                language model for the agent.
+            emb_model_config_name (str):
+                embedding model for the agent.
+            memory_config (dict):
+                memory configuration.
+            rag_config (dict):
+                config for RAG. It contains most of the
+                important parameters for RAG modules. If not provided,
+                the default setting will be used.
+                Examples can refer to children classes.
+        """
+        super().__init__(
+            name=name,
+            sys_prompt=sys_prompt,
+            model_config_name=model_config_name,
+            use_memory=True,
+            memory_config=memory_config,
+        )
+        # setup embedding model used in RAG
+        self.emb_model = load_model_by_config_name(emb_model_config_name)
+
+        self.rag_config = rag_config or {}
+        if "log_retrieval" not in self.rag_config:
+            self.rag_config["log_retrieval"] = True
+
+        # use LlamaIndexAgent OR LangChainAgent
+        self.rag = self.init_rag()
+
+    @abstractmethod
+    def init_rag(self) -> RAGBase:
+        """initialize RAG with configuration"""
+
+    def _prepare_args_from_config(
+        self,
+        config: dict,
+    ) -> Any:
+        """
+        Helper function to build args for the two functions:
+        rag.load_data(...) and rag.store_and_index(docs, ...)
+        in RAG classes.
+        Args:
+            config (dict): a dictionary containing configurations
+
+        Returns:
+            Any: an object that is parsed/built to be an element
+                of input to the function of RAG module.
+        """
+        if not isinstance(config, dict):
+            return config
+
+        if "create_object" in config:
+            # if a term in args is a object,
+            # recursively create object with args from config
+            module_name = config.get("module", "")
+            class_name = config.get("class", "")
+            init_args = config.get("init_args", {})
+            try:
+                cur_module = importlib.import_module(module_name)
+                cur_class = getattr(cur_module, class_name)
+                init_args = self._prepare_args_from_config(init_args)
+                logger.info(
+                    f"load and build object{cur_module, cur_class, init_args}",
+                )
+                return cur_class(**init_args)
+            except ImportError as exc_inner:
+                logger.error(
+                    f"Fail to load class {class_name} "
+                    f"from module {module_name}",
+                )
+                raise ImportError(
+                    f"Fail to load class {class_name} "
+                    f"from module {module_name}",
+                ) from exc_inner
+        else:
+            prepared_args = {}
+            for key, value in config.items():
+                if isinstance(value, list):
+                    prepared_args[key] = []
+                    for c in value:
+                        prepared_args[key].append(
+                            self._prepare_args_from_config(c),
+                        )
+                elif isinstance(value, dict):
+                    prepared_args[key] = self._prepare_args_from_config(value)
+                else:
+                    prepared_args[key] = value
+            return prepared_args
+
+    def reply(
+        self,
+        x: dict = None,
+    ) -> dict:
+        """
+        Reply function of the RAG agent.
+        Processes the input data,
+        1) use the input data to retrieve with RAG function;
+        2) generates a prompt using the current memory and system
+        prompt;
+        3) invokes the language model to produce a response. The
+        response is then formatted and added to the dialogue memory.
+
+        Args:
+            x (`dict`, defaults to `None`):
+                A dictionary representing the user's input to the agent. This
+                input is added to the memory if provided. Defaults to
+                None.
+        Returns:
+            A dictionary representing the message generated by the agent in
+            response to the user's input.
+        """
+        retrieved_docs_to_string = ""
+        # record the input if needed
+        if self.memory:
+            self.memory.add(x)
+            # in case no input is provided (e.g., in msghub),
+            # use the memory as query
+            history = self.memory.get_memory(
+                recent_n=self.rag_config.get("recent_n_mem", 1),
+            )
+            query = (
+                "/n".join(
+                    [msg["content"] for msg in history],
+                )
+                if isinstance(history, list)
+                else str(history)
+            )
+        elif x is not None:
+            query = x["content"]
+        else:
+            query = ""
+
+        if len(query) > 0:
+            # when content has information, do retrieval
+            retrieved_docs = self.rag.retrieve(query, to_list_strs=True)
+            for content in retrieved_docs:
+                retrieved_docs_to_string += "\n>>>> " + content
+
+            if self.rag_config["log_retrieval"]:
+                self.speak("[retrieved]:" + retrieved_docs_to_string)
+
+        # prepare prompt
+        prompt = self.model.format(
+            Msg(
+                name="system",
+                role="system",
+                content=self.sys_prompt,
+            ),
+            # {"role": "system", "content": retrieved_docs_to_string},
+            self.memory.get_memory(
+                recent_n=self.rag_config.get("recent_n_mem", 1),
+            ),
+            Msg(
+                name="user",
+                role="user",
+                content="Context: " + retrieved_docs_to_string,
+            ),
+        )
+
+        # call llm and generate response
+        response = self.model(prompt).text
+        msg = Msg(self.name, response)
+
+        # Print/speak the message in this agent's voice
+        self.speak(msg)
+
+        if self.memory:
+            # Record the message in memory
+            self.memory.add(msg)
+
+        return msg
+
+
+class LlamaIndexAgent(RAGAgentBase):
+    """
+    A LlamaIndex agent build on LlamaIndex.
+    """
+
+    def __init__(
+        self,
+        name: str,
+        sys_prompt: str,
+        model_config_name: str,
+        emb_model_config_name: str = None,
+        memory_config: Optional[dict] = None,
+        rag_config: Optional[dict] = None,
+    ) -> None:
+        """
+        Initialize the RAG LlamaIndexAgent
+        Args:
+            name (str):
+                the name for the agent
+            sys_prompt (str):
+                system prompt for the RAG agent
+            model_config_name (str):
+                language model for the agent
+            emb_model_config_name (str):
+                embedding model for the agent
+            memory_config (dict):
+                memory configuration
+            rag_config (dict):
+                config for RAG. It contains the parameters for
+                RAG modules functions:
+                rag.load_data(...) and rag.store_and_index(docs, ...)
+                 If not provided, the default setting will be used.
+                An example of the config for retrieving code files
+                is as following:
+
+                "rag_config": {
+                    "load_data": {
+                      "loader": {
+                        "create_object": true,
+                        "module": "llama_index.core",
+                        "class": "SimpleDirectoryReader",
+                        "init_args": {
+                          "input_dir": "path/to/data",
+                          "recursive": true
+                          ...
+                        }
+                      }
+                    },
+                    "store_and_index": {
+                      "transformations": [
+                        {
+                          "create_object": true,
+                          "module": "llama_index.core.node_parser",
+                          "class": "CodeSplitter",
+                          "init_args": {
+                            "language": "python",
+                            "chunk_lines": 100
+                          }
+                        }
+                      ]
+                    },
+                    "chunk_size": 2048,
+                    "chunk_overlap": 40,
+                    "similarity_top_k": 10,
+                    "log_retrieval": true,
+                    "recent_n_mem": 1
+               }
+        """
+        super().__init__(
+            name=name,
+            sys_prompt=sys_prompt,
+            model_config_name=model_config_name,
+            emb_model_config_name=emb_model_config_name,
+            memory_config=memory_config,
+            rag_config=rag_config,
+        )
+
+    def init_rag(self) -> LlamaIndexRAG:
+        # dynamic loading loader
+        # init rag related attributes
+        rag = LlamaIndexRAG(
+            model=self.model,
+            emb_model=self.emb_model,
+            config=self.rag_config,
+        )
+        # load the document to memory
+        # Feed the AgentScope tutorial documents, so that
+        # the agent can answer questions related to AgentScope!
+        if "load_data" in self.rag_config:
+            load_data_args = self._prepare_args_from_config(
+                self.rag_config["load_data"],
+            )
+        else:
+            try:
+                from llama_index.core import SimpleDirectoryReader
+            except ImportError as exc_inner:
+                raise ImportError(
+                    " LlamaIndexAgent requires llama-index to be install."
+                    "Please run `pip install llama-index`",
+                ) from exc_inner
+            load_data_args = {
+                "loader": SimpleDirectoryReader(self.config["data_path"]),
+            }
+        logger.info(f"rag.load_data args: {load_data_args}")
+        docs = rag.load_data(**load_data_args)
+
+        # store and indexing
+        if "store_and_index" in self.rag_config:
+            store_and_index_args = self._prepare_args_from_config(
+                self.rag_config["store_and_index"],
+            )
+        else:
+            store_and_index_args = {}
+
+        logger.info(f"store_and_index_args args: {store_and_index_args}")
+        rag.store_and_index(docs, **store_and_index_args)
+
+        return rag
diff --git a/examples/conversation_with_RAG_agents/rag_example.py b/examples/conversation_with_RAG_agents/rag_example.py
new file mode 100644
index 000000000..1e2b4c6d3
--- /dev/null
+++ b/examples/conversation_with_RAG_agents/rag_example.py
@@ -0,0 +1,64 @@
+# -*- coding: utf-8 -*-
+"""
+A simple example for conversation between user and
+an agent with RAG capability.
+"""
+import json
+import os
+
+from rag_agents import LlamaIndexAgent
+
+import agentscope
+from agentscope.agents import UserAgent
+from agentscope.message import Msg
+from agentscope.agents import DialogAgent
+
+
+def main() -> None:
+    """A RAG multi-agent demo"""
+    agentscope.init(
+        model_configs=[
+            {
+                "model_type": "dashscope_chat",
+                "config_name": "qwen_config",
+                "model_name": "qwen-max",
+                "api_key": f"{os.environ.get('DASHSCOPE_API_KEY')}",
+            },
+            {
+                "model_type": "dashscope_text_embedding",
+                "config_name": "qwen_emb_config",
+                "model_name": "text-embedding-v2",
+                "api_key": f"{os.environ.get('DASHSCOPE_API_KEY')}",
+            },
+        ],
+    )
+
+    with open("./agent_config.json", "r", encoding="utf-8") as f:
+        agent_configs = json.load(f)
+    tutorial_agent = LlamaIndexAgent(**agent_configs[0]["args"])
+    code_explain_agent = LlamaIndexAgent(**agent_configs[1]["args"])
+    summarize_agent = DialogAgent(**agent_configs[2]["args"])
+
+    user_agent = UserAgent()
+    # start the conversation between user and assistant
+    while True:
+        x = user_agent()
+        x.role = "user"  # to enforce dashscope requirement on roles
+        if len(x["content"]) == 0 or str(x["content"]).startswith("exit"):
+            break
+        tutorial_response = tutorial_agent(x)
+        code_explain = code_explain_agent(x)
+        msg = Msg(
+            name="user",
+            role="user",
+            content=tutorial_response["content"]
+            + "\n"
+            + code_explain["content"]
+            + "\n"
+            + x["content"],
+        )
+        summarize_agent(msg)
+
+
+if __name__ == "__main__":
+    main()