From f762dd69e22ef750b956db3d3d81452cbd3841c9 Mon Sep 17 00:00:00 2001 From: ZiTao-Li <135263265+ZiTao-Li@users.noreply.github.com> Date: Fri, 29 Mar 2024 16:39:21 +0800 Subject: [PATCH] Adding RAG example; [TODO] Abstract a RAG module and add it into repository (#64) --- README.md | 1 + README_ZH.md | 1 + .../conversation_with_RAG_agents/README.md | 57 +++ .../agent_config.json | 79 +++++ .../rag/__init__.py | 18 + .../rag/langchain_rag.py | 208 +++++++++++ .../rag/llama_index_rag.py | 320 +++++++++++++++++ .../conversation_with_RAG_agents/rag/rag.py | 118 +++++++ .../rag_agents.py | 332 ++++++++++++++++++ .../rag_example.py | 64 ++++ 10 files changed, 1198 insertions(+) create mode 100644 examples/conversation_with_RAG_agents/README.md create mode 100644 examples/conversation_with_RAG_agents/agent_config.json create mode 100644 examples/conversation_with_RAG_agents/rag/__init__.py create mode 100644 examples/conversation_with_RAG_agents/rag/langchain_rag.py create mode 100644 examples/conversation_with_RAG_agents/rag/llama_index_rag.py create mode 100644 examples/conversation_with_RAG_agents/rag/rag.py create mode 100644 examples/conversation_with_RAG_agents/rag_agents.py create mode 100644 examples/conversation_with_RAG_agents/rag_example.py diff --git a/README.md b/README.md index dee94f5d7..2c68bb324 100644 --- a/README.md +++ b/README.md @@ -107,6 +107,7 @@ the following libraries. - [Self-Organizing Conversation](./examples/conversation_self_organizing) - [Basic Conversation with LangChain library](./examples/conversation_with_langchain) - [Conversation with ReAct Agent](./examples/conversation_with_react_agent) + - [Conversation with RAG Agent](./examples/conversation_with_RAG_agents) - Game - [Gomoku](./examples/game_gomoku) diff --git a/README_ZH.md b/README_ZH.md index 125c684b4..53a542c25 100644 --- a/README_ZH.md +++ b/README_ZH.md @@ -95,6 +95,7 @@ AgentScope支持使用以下库快速部署本地模型服务。 - [智能体自组织的对话](./examples/conversation_self_organizing) - [兼容LangChain的基础对话](./examples/conversation_with_langchain) - [与ReAct智能体对话](./examples/conversation_with_react_agent) + - [与RAG智能体对话](./examples/conversation_with_RAG_agents) - 游戏 - [五子棋](./examples/game_gomoku) diff --git a/examples/conversation_with_RAG_agents/README.md b/examples/conversation_with_RAG_agents/README.md new file mode 100644 index 000000000..5b08379a4 --- /dev/null +++ b/examples/conversation_with_RAG_agents/README.md @@ -0,0 +1,57 @@ +# AgentScope Consultants: a Multi-Agent RAG Application + +* **What is this example about?** +With the provided implementation and configuration, +you will obtain three different agents who can help you answer different questions about AgentScope. + +* **What is this example for?** By this example, we want to show how the agent with retrieval augmented generation (RAG) +capability can be used to build easily. + +**Notice:** This example is a Beta version of the AgentScope RAG agent. A formal version will soon be added to `src/agentscope/agents`, but it may be subject to changes. + +## Prerequisites +* **Cloning repo:** This example requires cloning the whole AgentScope repo to local. +* **Packages:** This example is built on the LlamaIndex package. Thus, some packages need to be installed before running the example. + ```bash + pip install llama-index tree_sitter tree-sitter-languages + ``` +* **Model APIs:** This example uses Dashscope APIs. Thus, we also need an API key for DashScope. + ```bash + export DASH_SCOPE_API='YOUR_API_KEY' + ``` + +**Note:** This example has been tested with `dashscope_chat` and `dashscope_text_embedding` model wrapper, with `qwen-max` and `text-embedding-v2` models. +However, you are welcome to replace the Dashscope language and embedding model wrappers or models with other models you like to test. + +## Start AgentScope Consultants +* **Terminal:** The most simple way to execute the AgentScope Consultants is running in terminal. + ```bash + python ./rag_example.py + ``` + Setting `log_retrieval` to `false` in `agent_config.json` can hide the retrieved information and provide only answers of agents. + +* **AS studio:** If you want to have more organized, clean UI, you can also run with our `as_studio`. + ```bash + as_studio ./rag_example.py + ``` + +### Customize AgentScope Consultants to other consultants +After you run the example, you may notice that this example consists of three RAG agents: +* `AgentScope Tutorial Assistant`: responsible for answering questions based on AgentScope tutorials (markdown files). +* `AgentScope Framework Code Assistant`: responsible for answering questions based on AgentScope code base (python files). +* `Summarize Assistant`: responsible for summarize the questions from the above two agents. + +These agents can be configured to answering questions based on other GitHub repo, by simply modifying the `input_dir` fields in the `agent_config.json`. + +For more advanced customization, we may need to learn a little bit from the following. + +**RAG modules:** In AgentScope, RAG modules are abstract to provide three basic functions: `load_data`, `store_and_index` and `retrieve`. Refer to `src/agentscope/rag` for more details. + +**RAG configs:** In the example configuration (the `rag_config` field), all parameters are optional. But if you want to customize them, you may want to learn the following: +* `load_data`: contains all parameters for the the `rag.load_data` function. +Since the `load_data` accepts a dataloader object `loader`, the `loader` in the config need to have `"create_object": true` to let a internal parse create a LlamaIndex data loader object. +The loader object is an instance of `class` in module `module`, with initialization parameters in `init_args`. + +* `store_and_index`: contains all parameters for the the `rag.store_and_index` function. +For example, you can pass `vector_store` and `retriever` configurations in a similar way as the `loader` mentioned above. +For the `transformations` parameter, you can pass a list of dicts, each of which corresponds to building a `NodeParser`-kind of preprocessor in Llamaindex. \ No newline at end of file diff --git a/examples/conversation_with_RAG_agents/agent_config.json b/examples/conversation_with_RAG_agents/agent_config.json new file mode 100644 index 000000000..fc0a23c12 --- /dev/null +++ b/examples/conversation_with_RAG_agents/agent_config.json @@ -0,0 +1,79 @@ +[ + { + "class": "LlamaIndexAgent", + "args": { + "name": "AgentScope Tutorial Assistant", + "sys_prompt": "You're a helpful assistant. You need to generate answers based on the provided context.", + "model_config_name": "qwen_config", + "emb_model_config_name": "qwen_emb_config", + "rag_config": { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "../../docs/sphinx_doc/en/source/tutorial/", + "required_exts": [".md"] + } + } + }, + "chunk_size": 2048, + "chunk_overlap": 40, + "similarity_top_k": 10, + "log_retrieval": false, + "recent_n_mem": 1 + } + } + }, + { + "class": "LlamaIndexAgent", + "args": { + "name": "AgentScope Framework Code Assistant", + "sys_prompt": "You're a helpful assistant about coding. You can very familiar with the framework code of AgentScope.", + "model_config_name": "qwen_config", + "emb_model_config_name": "qwen_emb_config", + "rag_config": { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "../../src/agentscope", + "recursive": true, + "required_exts": [".py"] + } + } + }, + "store_and_index": { + "transformations": [ + { + "create_object": true, + "module": "llama_index.core.node_parser", + "class": "CodeSplitter", + "init_args": { + "language": "python", + "chunk_lines": 100 + } + } + ] + }, + "chunk_size": 2048, + "chunk_overlap": 40, + "similarity_top_k": 10, + "log_retrieval": false, + "recent_n_mem": 1 + } + } + }, + { + "class": "DialogAgent", + "args": { + "name": "Summarize Assistant", + "sys_prompt": "You are a helpful assistant that can summarize the answers of the previous two messages.", + "model_config_name": "qwen_config", + "use_memory": true + } + } +] \ No newline at end of file diff --git a/examples/conversation_with_RAG_agents/rag/__init__.py b/examples/conversation_with_RAG_agents/rag/__init__.py new file mode 100644 index 000000000..3c8f48882 --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag/__init__.py @@ -0,0 +1,18 @@ +# -*- coding: utf-8 -*- +""" Import all pipeline related modules in the package. """ +from .rag import RAGBase + +from .llama_index_rag import LlamaIndexRAG + + +try: + from .langchain_rag import LangChainRAG +except Exception: + LangChainRAG = None # type: ignore # NOQA + + +__all__ = [ + "RAGBase", + "LlamaIndexRAG", + "LangChainRAG", +] diff --git a/examples/conversation_with_RAG_agents/rag/langchain_rag.py b/examples/conversation_with_RAG_agents/rag/langchain_rag.py new file mode 100644 index 000000000..36a329547 --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag/langchain_rag.py @@ -0,0 +1,208 @@ +# -*- coding: utf-8 -*- +""" +This module is integrate the LangChain RAG model into our AgentScope package +""" + + +from typing import Any, Optional, Union + +try: + from langchain_core.vectorstores import VectorStore + from langchain_core.documents import Document + from langchain_core.embeddings import Embeddings + from langchain_community.document_loaders.base import BaseLoader + from langchain_community.vectorstores import Chroma + from langchain_text_splitters.base import TextSplitter + from langchain_text_splitters import CharacterTextSplitter +except ImportError: + VectorStore = None + Document = None + Embeddings = None + BaseLoader = None + Chroma = None + TextSplitter = None + CharacterTextSplitter = None + +from examples.conversation_with_RAG_agents.rag import RAGBase +from examples.conversation_with_RAG_agents.rag.rag import ( + DEFAULT_CHUNK_OVERLAP, + DEFAULT_CHUNK_SIZE, +) +from agentscope.models import ModelWrapperBase + + +class _LangChainEmbModel(Embeddings): + """ + Dummy wrapper to convert the ModelWrapperBase embedding model + to a LanguageChain RAG model + """ + + def __init__(self, emb_model: ModelWrapperBase) -> None: + """ + Dummy wrapper + Args: + emb_model (ModelWrapperBase): embedding model of + ModelWrapperBase type + """ + self._emb_model_wrapper = emb_model + + def embed_documents(self, texts: list[str]) -> list[list[float]]: + """ + Wrapper function for embedding list of documents + Args: + texts (list[str]): list of texts to be embedded + """ + results = [ + list(self._emb_model_wrapper(t).embedding[0]) for t in texts + ] + return results + + def embed_query(self, text: str) -> list[float]: + """ + Wrapper function for embedding a single query + Args: + text (str): query to be embedded + """ + return list(self._emb_model_wrapper(text).embedding[0]) + + +class LangChainRAG(RAGBase): + """ + This class is a wrapper around the LangChain RAG. + """ + + def __init__( + self, + model: Optional[ModelWrapperBase], + emb_model: Union[ModelWrapperBase, Embeddings, None], + config: Optional[dict] = None, + **kwargs: Any, + ) -> None: + """ + Initializes the LangChainRAG + Args: + model (ModelWrapperBase): + The language model used for final synthesis + emb_model ( Union[ModelWrapperBase, Embeddings, None]): + The embedding model used for generate embeddings + config (dict): + The additional configuration for llama index rag + """ + super().__init__(model, emb_model, **kwargs) + + self.loader = None + self.splitter = None + self.retriever = None + self.vector_store = None + + if VectorStore is None: + raise ImportError( + "Please install LangChain RAG packages to use LangChain RAG.", + ) + + self.config = config or {} + if isinstance(emb_model, ModelWrapperBase): + self.emb_model = _LangChainEmbModel(emb_model) + elif isinstance(emb_model, Embeddings): + self.emb_model = emb_model + else: + raise TypeError( + f"Embedding model does not support {type(self.emb_model)}.", + ) + + def load_data( + self, + loader: BaseLoader, + query: Optional[Any] = None, + **kwargs: Any, + ) -> list[Document]: + # pylint: disable=unused-argument + """ + Loading data from a directory + Args: + loader (BaseLoader): + accepting a LangChain loader instance + query (str): + accepting a query, LangChain does not rely on this + Returns: + list[Document]: a list of documents loaded + """ + self.loader = loader + docs = self.loader.load() + return docs + + def store_and_index( + self, + docs: Any, + vector_store: Optional[VectorStore] = None, + splitter: Optional[TextSplitter] = None, + **kwargs: Any, + ) -> Any: + # pylint: disable=unused-argument + """ + Preprocessing the loaded documents. + Args: + docs (Any): + documents to be processed + vector_store (Optional[VectorStore]): + vector store in LangChain RAG + splitter (Optional[TextSplitter]): + optional, specifies the splitter to preprocess + the documents + + Returns: + None + + In LlamaIndex terms, an Index is a data structure composed + of Document objects, designed to enable querying by an LLM. + For example: + 1) preprocessing documents with + 2) generate embedding, + 3) store the embedding-content to vdb + """ + self.splitter = splitter or CharacterTextSplitter( + chunk_size=self.config.get("chunk_size", DEFAULT_CHUNK_SIZE), + chunk_overlap=self.config.get( + "chunk_overlap", + DEFAULT_CHUNK_OVERLAP, + ), + ) + all_splits = self.splitter.split_documents(docs) + + # indexing the chunks and store them into the vector store + if vector_store is None: + vector_store = Chroma() + self.vector_store = vector_store.from_documents( + documents=all_splits, + embedding=self.emb_model, + ) + + # build retriever + search_type = self.config.get("search_type", "similarity") + self.retriever = self.vector_store.as_retriever( + search_type=search_type, + search_kwargs={ + "k": self.config.get("similarity_top_k", 6), + }, + ) + + def retrieve(self, query: Any, to_list_strs: bool = False) -> list[Any]: + """ + This is a basic retrieve function with LangChain APIs + Args: + query: query is expected to be a question in string + + Returns: + list of answers + + More advanced retriever can refer to + https://python.langchain.com/docs/modules/data_connection/retrievers/ + """ + + retrieved_docs = self.retriever.invoke(query) + if to_list_strs: + results = [] + for doc in retrieved_docs: + results.append(doc.page_content) + return results + return retrieved_docs diff --git a/examples/conversation_with_RAG_agents/rag/llama_index_rag.py b/examples/conversation_with_RAG_agents/rag/llama_index_rag.py new file mode 100644 index 000000000..8756856ff --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag/llama_index_rag.py @@ -0,0 +1,320 @@ +# -*- coding: utf-8 -*- +""" +This module is an integration of the Llama index RAG +into AgentScope package +""" + +from typing import Any, Optional, List, Union +from loguru import logger + +try: + from llama_index.core.readers.base import BaseReader + from llama_index.core.base.base_retriever import BaseRetriever + from llama_index.core.base.embeddings.base import BaseEmbedding, Embedding + from llama_index.core.ingestion import IngestionPipeline + from llama_index.core.vector_stores.types import ( + BasePydanticVectorStore, + VectorStore, + ) + from llama_index.core.bridge.pydantic import PrivateAttr + from llama_index.core.node_parser.interface import NodeParser + from llama_index.core.node_parser import SentenceSplitter + from llama_index.core import ( + VectorStoreIndex, + ) +except ImportError: + BaseReader, BaseRetriever = None, None + BaseEmbedding, Embedding = None, None + IngestionPipeline, BasePydanticVectorStore, VectorStore = None, None, None + NodeParser, SentenceSplitter = None, None + VectorStoreIndex = None + PrivateAttr = None + +from rag import RAGBase +from rag.rag import ( + DEFAULT_CHUNK_SIZE, + DEFAULT_CHUNK_OVERLAP, + DEFAULT_TOP_K, +) +from agentscope.models import ModelWrapperBase + + +class _EmbeddingModel(BaseEmbedding): + """ + wrapper for ModelWrapperBase to an embedding model can be used + in Llama Index pipeline. + """ + + _emb_model_wrapper: ModelWrapperBase = PrivateAttr() + + def __init__( + self, + emb_model: ModelWrapperBase, + embed_batch_size: int = 1, + ) -> None: + """ + Dummy wrapper to convert a ModelWrapperBase to llama Index + embedding model + + Args: + emb_model (ModelWrapperBase): embedding model in ModelWrapperBase + embed_batch_size (int): batch size, defaults to 1 + """ + super().__init__( + model_name="Temporary_embedding_wrapper", + embed_batch_size=embed_batch_size, + ) + self._emb_model_wrapper = emb_model + + def _get_query_embedding(self, query: str) -> List[float]: + """ + get embedding for query + Args: + query (str): query to be embedded + """ + # Note: AgentScope embedding model wrapper returns list of embedding + return list(self._emb_model_wrapper(query).embedding[0]) + + def _get_text_embeddings(self, texts: List[str]) -> List[Embedding]: + """ + get embedding for list of strings + Args: + texts ( List[str]): texts to be embedded + """ + results = [ + list(self._emb_model_wrapper(t).embedding[0]) for t in texts + ] + return results + + def _get_text_embedding(self, text: str) -> Embedding: + """ + get embedding for a single string + Args: + text (str): texts to be embedded + """ + return list(self._emb_model_wrapper(text).embedding[0]) + + # TODO: use proper async methods, but depends on model wrapper + async def _aget_query_embedding(self, query: str) -> List[float]: + """The asynchronous version of _get_query_embedding.""" + return self._get_query_embedding(query) + + async def _aget_text_embedding(self, text: str) -> List[float]: + """Asynchronously get text embedding.""" + return self._get_text_embedding(text) + + async def _aget_text_embeddings( + self, + texts: List[str], + ) -> List[List[float]]: + """Asynchronously get text embeddings.""" + return self._get_text_embeddings(texts) + + +class LlamaIndexRAG(RAGBase): + """ + This class is a wrapper with the llama index RAG. + """ + + def __init__( + self, + model: Optional[ModelWrapperBase], + emb_model: Union[ModelWrapperBase, BaseEmbedding, None] = None, + config: Optional[dict] = None, + **kwargs: Any, + ) -> None: + """ + RAG component based on llama index. + Args: + model (ModelWrapperBase): + The language model used for final synthesis + emb_model (Optional[ModelWrapperBase]): + The embedding model used for generate embeddings + config (dict): + The additional configuration for llama index rag + """ + super().__init__(model, emb_model, config, **kwargs) + self.retriever = None + self.index = None + self.persist_dir = kwargs.get("persist_dir", "/") + self.emb_model = emb_model + print(self.config) + + # ensure the emb_model is compatible with LlamaIndex + if isinstance(emb_model, ModelWrapperBase): + self.emb_model = _EmbeddingModel(emb_model) + elif isinstance(self.emb_model, BaseEmbedding): + pass + else: + raise TypeError( + f"Embedding model does not support {type(self.emb_model)}.", + ) + + def load_data( + self, + loader: BaseReader, + query: Optional[str] = None, + **kwargs: Any, + ) -> Any: + """ + Accept a loader, loading the desired data (no chunking) + Args: + loader (BaseReader): + object to load data, expected be an instance of class + inheriting from BaseReader in llama index. + query (Optional[str]): + optional, used when the data is in a database. + + Returns: + Any: loaded documents + + Example 1: use simple directory loader to load general documents, + including Markdown, PDFs, Word documents, PowerPoint decks, images, + audio and video. + ``` + load_data_to_chunks( + loader=SimpleDirectoryReader("./data") + ) + ``` + + Example 2: use SQL loader + ``` + load_data_to_chunks( + DatabaseReader( + scheme=os.getenv("DB_SCHEME"), + host=os.getenv("DB_HOST"), + port=os.getenv("DB_PORT"), + user=os.getenv("DB_USER"), + password=os.getenv("DB_PASS"), + dbname=os.getenv("DB_NAME"), + ), + query = "SELECT * FROM users" + ) + ``` + """ + if query is None: + documents = loader.load_data() + else: + documents = loader.load_data(query) + logger.info(f"loaded {len(documents)} documents") + return documents + + def store_and_index( + self, + docs: Any, + vector_store: Union[BasePydanticVectorStore, VectorStore, None] = None, + retriever: Optional[BaseRetriever] = None, + transformations: Optional[list[NodeParser]] = None, + **kwargs: Any, + ) -> Any: + """ + Preprocessing the loaded documents. + Args: + docs (Any): + documents to be processed, usually expected to be in + llama index Documents. + vector_store (Union[BasePydanticVectorStore, VectorStore, None]): + vector store in llama index + retriever (Optional[BaseRetriever]): + optional, specifies the retriever in llama index to be used + transformations (Optional[list[NodeParser]]): + optional, specifies the transformations (operators) to + process documents (e.g., split the documents into smaller + chunks) + + Return: + Any: return the index of the processed document + + In LlamaIndex terms, an Index is a data structure composed + of Document objects, designed to enable querying by an LLM. + For example: + 1) preprocessing documents with + 2) generate embedding, + 3) store the embedding-content to vdb + """ + # build and run preprocessing pipeline + if transformations is None: + transformations = [ + SentenceSplitter( + chunk_size=self.config.get( + "chunk_size", + DEFAULT_CHUNK_SIZE, + ), + chunk_overlap=self.config.get( + "chunk_overlap", + DEFAULT_CHUNK_OVERLAP, + ), + ), + ] + + # adding embedding model as the last step of transformation + # https://docs.llamaindex.ai/en/stable/module_guides/loading/ingestion_pipeline/root.html + transformations.append(self.emb_model) + + if vector_store is not None: + pipeline = IngestionPipeline( + transformations=transformations, + vector_store=vector_store, + ) + _ = pipeline.run(docs) + self.index = VectorStoreIndex.from_vector_store(vector_store) + else: + # No vector store is provide, use simple in memory + pipeline = IngestionPipeline( + transformations=transformations, + ) + nodes = pipeline.run(documents=docs) + self.index = VectorStoreIndex( + nodes=nodes, + embed_model=self.emb_model, + ) + + # set the retriever + if retriever is None: + logger.info( + f'{self.config.get("similarity_top_k", DEFAULT_TOP_K)}', + ) + self.retriever = self.index.as_retriever( + embed_model=self.emb_model, + similarity_top_k=self.config.get( + "similarity_top_k", + DEFAULT_TOP_K, + ), + **kwargs, + ) + else: + self.retriever = retriever + return self.index + + def set_retriever(self, retriever: BaseRetriever) -> None: + """ + Reset the retriever if necessary. + Args: + retriever (BaseRetriever): passing a retriever in llama index. + """ + self.retriever = retriever + + def retrieve(self, query: str, to_list_strs: bool = False) -> list[Any]: + """ + This is a basic retrieve function + Args: + query (str): + query is expected to be a question in string + to_list_strs (book): + whether returns the list of strings; + if False, return NodeWithScore + + Return: + list[Any]: list of str or NodeWithScore + + + More advanced query processing can refer to + https://docs.llamaindex.ai/en/stable/examples/query_transformations/query_transform_cookbook.html + """ + retrieved = self.retriever.retrieve(str(query)) + if to_list_strs: + results = [] + for node in retrieved: + results.append(node.get_text()) + return results + return retrieved diff --git a/examples/conversation_with_RAG_agents/rag/rag.py b/examples/conversation_with_RAG_agents/rag/rag.py new file mode 100644 index 000000000..0de27ca37 --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag/rag.py @@ -0,0 +1,118 @@ +# -*- coding: utf-8 -*- +""" +Base class module for retrieval augmented generation (RAG). +To accommodate the RAG process of different packages, +we abstract the RAG process into four stages: +- data loading: loading data into memory for following processing; +- data indexing and storage: document chunking, embedding generation, +and off-load the data into VDB; +- data retrieval: taking a query and return a batch of documents or +document chunks; +- post-processing of the retrieved data: use the retrieved data to +generate an answer. +""" + +from abc import ABC, abstractmethod +from typing import Any, Optional + +from agentscope.models import ModelWrapperBase + +DEFAULT_CHUNK_SIZE = 1024 +DEFAULT_CHUNK_OVERLAP = 20 +DEFAULT_TOP_K = 5 + + +class RAGBase(ABC): + """ + Base class for RAG, CANNOT be instantiated directly + """ + + def __init__( + self, + model: Optional[ModelWrapperBase], + emb_model: Any = None, + config: Optional[dict] = None, + **kwargs: Any, + ) -> None: + # pylint: disable=unused-argument + self.postprocessing_model = model + self.emb_model = emb_model + self.config = config or {} + + @abstractmethod + def load_data( + self, + loader: Any, + query: Any, + **kwargs: Any, + ) -> Any: + """ + Load data (documents) from disk to memory and chunking them + Args: + loader (Any): data loader, depending on the package + query (str): query for getting data from DB + + Returns: + Any: loaded documents + """ + + @abstractmethod + def store_and_index( + self, + docs: Any, + vector_store: Any, + **kwargs: Any, + ) -> Any: + """ + Store and index the documents. + Args: + docs (Any): + documents to be processed, stored and indexed + vector_store (Any): + vector store to store the index and/or documents + + Returns: + Any: can be indices, depending on the RAG package + + preprocessing the loaded documents, for example: + 1) chunking, + 2) generate embedding, + 3) store the embedding-content to vdb + """ + + @abstractmethod + def retrieve(self, query: Any, to_list_strs: bool = False) -> list[Any]: + """ + retrieve list of content from vdb to memory + Args: + query (Any): query to retrieve + to_list_strs (bool): whether return a list of str + + Returns: + return a list with retrieved documents (in strings) + """ + + def post_processing( + self, + retrieved_docs: list[str], + prompt: str, + **kwargs: Any, + ) -> Any: + """ + A default solution for post-processing function, generates answer + based on the retrieved documents. + Args: + retrieved_docs (list[str]): + list of retrieved documents + prompt (str): + prompt for LLM generating answer with the retrieved documents + + Returns: + Any: a synthesized answer from LLM with retrieved documents + + Example: + self.postprocessing_model(prompt.format(retrieved_docs)) + """ + assert self.postprocessing_model + prompt = prompt.format("\n".join(retrieved_docs)) + return self.postprocessing_model(prompt, **kwargs).text diff --git a/examples/conversation_with_RAG_agents/rag_agents.py b/examples/conversation_with_RAG_agents/rag_agents.py new file mode 100644 index 000000000..101b2e305 --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag_agents.py @@ -0,0 +1,332 @@ +# -*- coding: utf-8 -*- +""" +This example shows how to build an agent with RAG +with LlamaIndex. + +Notice, this is a Beta version of RAG agent. +""" + +from abc import ABC, abstractmethod +from typing import Optional, Any +import importlib +from loguru import logger + +from rag import RAGBase, LlamaIndexRAG + +from agentscope.agents.agent import AgentBase +from agentscope.message import Msg +from agentscope.models import load_model_by_config_name + + +class RAGAgentBase(AgentBase, ABC): + """ + Base class for RAG agents + """ + + def __init__( + self, + name: str, + sys_prompt: str, + model_config_name: str, + emb_model_config_name: str, + memory_config: Optional[dict] = None, + rag_config: Optional[dict] = None, + ) -> None: + """ + Initialize the RAG base agent + Args: + name (str): + the name for the agent. + sys_prompt (str): + system prompt for the RAG agent. + model_config_name (str): + language model for the agent. + emb_model_config_name (str): + embedding model for the agent. + memory_config (dict): + memory configuration. + rag_config (dict): + config for RAG. It contains most of the + important parameters for RAG modules. If not provided, + the default setting will be used. + Examples can refer to children classes. + """ + super().__init__( + name=name, + sys_prompt=sys_prompt, + model_config_name=model_config_name, + use_memory=True, + memory_config=memory_config, + ) + # setup embedding model used in RAG + self.emb_model = load_model_by_config_name(emb_model_config_name) + + self.rag_config = rag_config or {} + if "log_retrieval" not in self.rag_config: + self.rag_config["log_retrieval"] = True + + # use LlamaIndexAgent OR LangChainAgent + self.rag = self.init_rag() + + @abstractmethod + def init_rag(self) -> RAGBase: + """initialize RAG with configuration""" + + def _prepare_args_from_config( + self, + config: dict, + ) -> Any: + """ + Helper function to build args for the two functions: + rag.load_data(...) and rag.store_and_index(docs, ...) + in RAG classes. + Args: + config (dict): a dictionary containing configurations + + Returns: + Any: an object that is parsed/built to be an element + of input to the function of RAG module. + """ + if not isinstance(config, dict): + return config + + if "create_object" in config: + # if a term in args is a object, + # recursively create object with args from config + module_name = config.get("module", "") + class_name = config.get("class", "") + init_args = config.get("init_args", {}) + try: + cur_module = importlib.import_module(module_name) + cur_class = getattr(cur_module, class_name) + init_args = self._prepare_args_from_config(init_args) + logger.info( + f"load and build object{cur_module, cur_class, init_args}", + ) + return cur_class(**init_args) + except ImportError as exc_inner: + logger.error( + f"Fail to load class {class_name} " + f"from module {module_name}", + ) + raise ImportError( + f"Fail to load class {class_name} " + f"from module {module_name}", + ) from exc_inner + else: + prepared_args = {} + for key, value in config.items(): + if isinstance(value, list): + prepared_args[key] = [] + for c in value: + prepared_args[key].append( + self._prepare_args_from_config(c), + ) + elif isinstance(value, dict): + prepared_args[key] = self._prepare_args_from_config(value) + else: + prepared_args[key] = value + return prepared_args + + def reply( + self, + x: dict = None, + ) -> dict: + """ + Reply function of the RAG agent. + Processes the input data, + 1) use the input data to retrieve with RAG function; + 2) generates a prompt using the current memory and system + prompt; + 3) invokes the language model to produce a response. The + response is then formatted and added to the dialogue memory. + + Args: + x (`dict`, defaults to `None`): + A dictionary representing the user's input to the agent. This + input is added to the memory if provided. Defaults to + None. + Returns: + A dictionary representing the message generated by the agent in + response to the user's input. + """ + retrieved_docs_to_string = "" + # record the input if needed + if self.memory: + self.memory.add(x) + # in case no input is provided (e.g., in msghub), + # use the memory as query + history = self.memory.get_memory( + recent_n=self.rag_config.get("recent_n_mem", 1), + ) + query = ( + "/n".join( + [msg["content"] for msg in history], + ) + if isinstance(history, list) + else str(history) + ) + elif x is not None: + query = x["content"] + else: + query = "" + + if len(query) > 0: + # when content has information, do retrieval + retrieved_docs = self.rag.retrieve(query, to_list_strs=True) + for content in retrieved_docs: + retrieved_docs_to_string += "\n>>>> " + content + + if self.rag_config["log_retrieval"]: + self.speak("[retrieved]:" + retrieved_docs_to_string) + + # prepare prompt + prompt = self.model.format( + Msg( + name="system", + role="system", + content=self.sys_prompt, + ), + # {"role": "system", "content": retrieved_docs_to_string}, + self.memory.get_memory( + recent_n=self.rag_config.get("recent_n_mem", 1), + ), + Msg( + name="user", + role="user", + content="Context: " + retrieved_docs_to_string, + ), + ) + + # call llm and generate response + response = self.model(prompt).text + msg = Msg(self.name, response) + + # Print/speak the message in this agent's voice + self.speak(msg) + + if self.memory: + # Record the message in memory + self.memory.add(msg) + + return msg + + +class LlamaIndexAgent(RAGAgentBase): + """ + A LlamaIndex agent build on LlamaIndex. + """ + + def __init__( + self, + name: str, + sys_prompt: str, + model_config_name: str, + emb_model_config_name: str = None, + memory_config: Optional[dict] = None, + rag_config: Optional[dict] = None, + ) -> None: + """ + Initialize the RAG LlamaIndexAgent + Args: + name (str): + the name for the agent + sys_prompt (str): + system prompt for the RAG agent + model_config_name (str): + language model for the agent + emb_model_config_name (str): + embedding model for the agent + memory_config (dict): + memory configuration + rag_config (dict): + config for RAG. It contains the parameters for + RAG modules functions: + rag.load_data(...) and rag.store_and_index(docs, ...) + If not provided, the default setting will be used. + An example of the config for retrieving code files + is as following: + + "rag_config": { + "load_data": { + "loader": { + "create_object": true, + "module": "llama_index.core", + "class": "SimpleDirectoryReader", + "init_args": { + "input_dir": "path/to/data", + "recursive": true + ... + } + } + }, + "store_and_index": { + "transformations": [ + { + "create_object": true, + "module": "llama_index.core.node_parser", + "class": "CodeSplitter", + "init_args": { + "language": "python", + "chunk_lines": 100 + } + } + ] + }, + "chunk_size": 2048, + "chunk_overlap": 40, + "similarity_top_k": 10, + "log_retrieval": true, + "recent_n_mem": 1 + } + """ + super().__init__( + name=name, + sys_prompt=sys_prompt, + model_config_name=model_config_name, + emb_model_config_name=emb_model_config_name, + memory_config=memory_config, + rag_config=rag_config, + ) + + def init_rag(self) -> LlamaIndexRAG: + # dynamic loading loader + # init rag related attributes + rag = LlamaIndexRAG( + model=self.model, + emb_model=self.emb_model, + config=self.rag_config, + ) + # load the document to memory + # Feed the AgentScope tutorial documents, so that + # the agent can answer questions related to AgentScope! + if "load_data" in self.rag_config: + load_data_args = self._prepare_args_from_config( + self.rag_config["load_data"], + ) + else: + try: + from llama_index.core import SimpleDirectoryReader + except ImportError as exc_inner: + raise ImportError( + " LlamaIndexAgent requires llama-index to be install." + "Please run `pip install llama-index`", + ) from exc_inner + load_data_args = { + "loader": SimpleDirectoryReader(self.config["data_path"]), + } + logger.info(f"rag.load_data args: {load_data_args}") + docs = rag.load_data(**load_data_args) + + # store and indexing + if "store_and_index" in self.rag_config: + store_and_index_args = self._prepare_args_from_config( + self.rag_config["store_and_index"], + ) + else: + store_and_index_args = {} + + logger.info(f"store_and_index_args args: {store_and_index_args}") + rag.store_and_index(docs, **store_and_index_args) + + return rag diff --git a/examples/conversation_with_RAG_agents/rag_example.py b/examples/conversation_with_RAG_agents/rag_example.py new file mode 100644 index 000000000..1e2b4c6d3 --- /dev/null +++ b/examples/conversation_with_RAG_agents/rag_example.py @@ -0,0 +1,64 @@ +# -*- coding: utf-8 -*- +""" +A simple example for conversation between user and +an agent with RAG capability. +""" +import json +import os + +from rag_agents import LlamaIndexAgent + +import agentscope +from agentscope.agents import UserAgent +from agentscope.message import Msg +from agentscope.agents import DialogAgent + + +def main() -> None: + """A RAG multi-agent demo""" + agentscope.init( + model_configs=[ + { + "model_type": "dashscope_chat", + "config_name": "qwen_config", + "model_name": "qwen-max", + "api_key": f"{os.environ.get('DASHSCOPE_API_KEY')}", + }, + { + "model_type": "dashscope_text_embedding", + "config_name": "qwen_emb_config", + "model_name": "text-embedding-v2", + "api_key": f"{os.environ.get('DASHSCOPE_API_KEY')}", + }, + ], + ) + + with open("./agent_config.json", "r", encoding="utf-8") as f: + agent_configs = json.load(f) + tutorial_agent = LlamaIndexAgent(**agent_configs[0]["args"]) + code_explain_agent = LlamaIndexAgent(**agent_configs[1]["args"]) + summarize_agent = DialogAgent(**agent_configs[2]["args"]) + + user_agent = UserAgent() + # start the conversation between user and assistant + while True: + x = user_agent() + x.role = "user" # to enforce dashscope requirement on roles + if len(x["content"]) == 0 or str(x["content"]).startswith("exit"): + break + tutorial_response = tutorial_agent(x) + code_explain = code_explain_agent(x) + msg = Msg( + name="user", + role="user", + content=tutorial_response["content"] + + "\n" + + code_explain["content"] + + "\n" + + x["content"], + ) + summarize_agent(msg) + + +if __name__ == "__main__": + main()