From 65950c1268a8234ddfff1676f4a014c0216b6c73 Mon Sep 17 00:00:00 2001 From: Li Yin Date: Sat, 29 Jun 2024 11:47:45 -0700 Subject: [PATCH 1/2] document clean up --- developer_notes/generator.ipynb | 44 +++++++++++++- developer_notes/generator_note.py | 30 ++++++++++ docs/source/developer_notes/index.rst | 13 ++-- docs/source/index.rst | 85 ++++++++++++++++++--------- lightrag/core/base_data_class.py | 2 +- 5 files changed, 136 insertions(+), 38 deletions(-) create mode 100644 developer_notes/generator_note.py diff --git a/developer_notes/generator.ipynb b/developer_notes/generator.ipynb index 67eb62a6..548ca532 100644 --- a/developer_notes/generator.ipynb +++ b/developer_notes/generator.ipynb @@ -74,10 +74,48 @@ }, { "cell_type": "code", - "execution_count": null, + "execution_count": 3, "metadata": {}, - "outputs": [], - "source": [] + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "GeneratorOutput(data='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?', error=None, usage=None, raw_response='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?')\n" + ] + } + ], + "source": [ + "from lightrag.core import Component, Generator, Prompt\n", + "from lightrag.components.model_client import GroqAPIClient\n", + "from lightrag.utils import setup_env\n", + "\n", + "\n", + "class SimpleQA(Component):\n", + " def __init__(self):\n", + " super().__init__()\n", + " template = r\"\"\"\n", + " You are a helpful assistant.\n", + " \n", + " User: {{input_str}}\n", + " You:\n", + " \"\"\"\n", + " self.generator = Generator(\n", + " model_client=GroqAPIClient(), model_kwargs={\"model\": \"llama3-8b-8192\"}, template=template\n", + " )\n", + "\n", + " def call(self, query):\n", + " return self.generator({\"input_str\": query})\n", + "\n", + " async def acall(self, query):\n", + " return await self.generator.acall({\"input_str\": query})\n", + "\n", + "\n", + "qa = SimpleQA()\n", + "answer = qa(\"What is LightRAG?\")\n", + "\n", + "print(answer)" + ] } ], "metadata": { diff --git a/developer_notes/generator_note.py b/developer_notes/generator_note.py new file mode 100644 index 00000000..5d05f31a --- /dev/null +++ b/developer_notes/generator_note.py @@ -0,0 +1,30 @@ +from lightrag.core import Component, Generator +from lightrag.components.model_client import GroqAPIClient +from lightrag.utils import setup_env # noqa + + +class SimpleQA(Component): + def __init__(self): + super().__init__() + template = r""" + You are a helpful assistant. + + User: {{input_str}} + You: + """ + self.generator = Generator( + model_client=GroqAPIClient(), + model_kwargs={"model": "llama3-8b-8192"}, + template=template, + ) + + def call(self, query): + return self.generator({"input_str": query}) + + async def acall(self, query): + return await self.generator.acall({"input_str": query}) + + +qa = SimpleQA() +answer = qa("What is LightRAG?") +print(qa) diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst index 28cb8c41..6e25d191 100644 --- a/docs/source/developer_notes/index.rst +++ b/docs/source/developer_notes/index.rst @@ -26,17 +26,18 @@ This is our tutorials before you move ahead to build use cases (LLM application .. :width: 200px LightRAG library focus on providing building blocks for developers to **build** and **optimize** the `task pipeline`. -We have clear design phisolophy: +We have clear design phisolophy: :doc:`lightrag_design_philosophy`. -.. toctree:: - :maxdepth: 1 - :caption: Introduction +.. :maxdepth: 1 +.. :hidden: + +.. lightrag_design_philosophy + - lightrag_design_philosophy - llm_intro +.. llm_intro diff --git a/docs/source/index.rst b/docs/source/index.rst index b23ce133..23b8bdb9 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -1,17 +1,13 @@ -.. LightRAG documentation master file, created by - sphinx-quickstart on Thu May 9 15:45:29 2024. - You can adapt this file completely to your liking, but it should at least - contain the root `toctree` directive. ======================= -LightRAG Home +Introduction ======================= -LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is super light, modular and robust like "PyTorch", and offers essential components for `Retriever`-`Agent`-`Generator` (RAG). +LightRAG is the "PyTorch" library for building large langage model(LLM) applications. We help developers on both building and optimimizing `Retriever`-`Agent`-`Generator` (RAG) pipelines. +It is light, modular, and robust. -You have a similar coding experience as PyTorch. Here is a side to side comparison of writing a PyTorch module and a LightRAG component: -.. grid:: 2 +.. grid:: 1 :gutter: 1 .. grid-item-card:: PyTorch @@ -43,25 +39,56 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis .. code-block:: python - from core.component import Component, Generator - from components.model_client import OpenAIClient + from lightrag.core import Component, Generator + from lightrag.components.model_client import GroqAPIClient + from lightrag.utils import setup_env + class SimpleQA(Component): def __init__(self): super().__init__() + template = r""" + You are a helpful assistant. + + User: {{input_str}} + You: + """ self.generator = Generator( - model_client=OpenAIClient(), - model_kwargs={'model': 'gpt-3.5-turbo'} + model_client=GroqAPIClient(), + model_kwargs={"model": "llama3-8b-8192"}, + template=template, ) def call(self, query): - return self.generator.call({'input_str': query}) + return self.generator({"input_str": query}) async def acall(self, query): - return await self.generator.acall({'input_str': query}) + return await self.generator.acall({"input_str": query}) + qa = SimpleQA() - print(qa) + answer = qa("What is LightRAG?") + +LightRAG + +Here is the printed out structure of ``qa``: + +.. code-block:: + + SimpleQA( + (generator): Generator( + model_kwargs={'model': 'llama3-8b-8192'}, + (prompt): Prompt( + template: + You are a helpful assistant. + + User: {{input_str}} + You: + , prompt_variables: ['input_str'] + ) + (model_client): GroqAPIClient() + ) + ) @@ -94,7 +121,6 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis -**LightRAG vs other LLM libraries:** **LightRAG library structures as follows:** @@ -105,21 +131,20 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis * `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components. -**LightRAG documentation is divided into two parts:** - -* **Developer Documentation**: This documentation explains how LightRAG is designed in more depth and is especially useful for developers who want to contribute to LightRAG. -* **User Documentation**: This documentation is for users who want to use LightRAG to build their applications. -We encourage all users to at least skim through the developer documentation. Different from "PyTorch" where a normal user does not have to customize a building module for neural network, -LLM applications have much bigger scope and varies even more to different product environments, so developers customizing components on their own is much more common. +:doc:`get_started/index` +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +:doc:`developer_notes/index` +:doc:`apis/index` .. toctree:: :glob: :maxdepth: 1 :caption: New Users + :hidden: get_started/index @@ -130,9 +155,13 @@ LLM applications have much bigger scope and varies even more to different produc .. toctree:: :glob: :maxdepth: 1 - :caption: Tutorials - How each part works developer_notes/index + .. :caption: Tutorials - How each part works + .. :hidden: + + + @@ -142,6 +171,7 @@ LLM applications have much bigger scope and varies even more to different produc .. toctree:: :maxdepth: 1 :caption: Use Cases - How different parts are used to build various LLM applications + :hidden: tutorials/index @@ -149,16 +179,14 @@ LLM applications have much bigger scope and varies even more to different produc .. toctree:: :maxdepth: 1 :caption: API Reference + :hidden: apis/index -.. todo:: - .. toctree:: - :maxdepth: 1 - :caption: Benchmarks + .. :caption: Benchmarks - Manually add documents for the code in benchmarks + .. Manually add documents for the code in benchmarks .. :glob: @@ -172,5 +200,6 @@ LLM applications have much bigger scope and varies even more to different produc :glob: :maxdepth: 1 :caption: For Contributors + :hidden: contributor/index diff --git a/lightrag/core/base_data_class.py b/lightrag/core/base_data_class.py index 4d84059b..e434097d 100644 --- a/lightrag/core/base_data_class.py +++ b/lightrag/core/base_data_class.py @@ -179,7 +179,7 @@ class MyOutputs(DataClass): def __post_init__(self): for f in fields(self): - if "desc" not in f.metadata or "desription" not in f.metadata: + if "desc" not in f.metadata and "description" not in f.metadata: warnings.warn( f"Class { self.__class__.__name__} Field {f.name} is missing 'desc' in metadata", UserWarning, From 75ab0ea32d1d49d7383607a9e952db14639791c3 Mon Sep 17 00:00:00 2001 From: Li Yin Date: Sat, 29 Jun 2024 12:09:06 -0700 Subject: [PATCH 2/2] home page and developer notes --- docs/source/developer_notes/index.rst | 26 +++++---- docs/source/index.rst | 81 +++++++++------------------ 2 files changed, 40 insertions(+), 67 deletions(-) diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst index 6e25d191..0029d1da 100644 --- a/docs/source/developer_notes/index.rst +++ b/docs/source/developer_notes/index.rst @@ -9,9 +9,11 @@ Developer Notes Learn LightRAG design phisolophy and the `why` and `how-to` (customize and integrate) behind each core part within the LightRAG library. This is our tutorials before you move ahead to build use cases (LLM applications) end to end. -.. note:: +.. raw:: - You can read interchangably between :ref:`Use Cases `. + .. note:: + + You can read interchangably between :ref:`Use Cases `. @@ -43,10 +45,9 @@ We have clear design phisolophy: :doc:`lightrag_design_philosophy`. Building -============================= - +------------------- Base classes ---------------- +~~~~~~~~~~~~~~~~~~~~~~ Code path: ``lightrag.core``. .. list-table:: @@ -74,9 +75,10 @@ Code path: ``lightrag.core``. base_data_class RAG Essentials -------------------- +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ RAG components -~~~~~~~~~~~~~~~~~~~~~~ +^^^^^^^^^^^^^^^^^^^ + Code path: ``lightrag.core``. For abstract classes: @@ -101,8 +103,8 @@ Code path: ``lightrag.core``. For abstract classes: * - :doc:`retriever` - The base class for all retrievers who in particular retrieve relevant documents from a given database to add **context** to the generator. -Data, Processing, and storage -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Data Pipeline and Storage +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^ Data Processing: including transformer, pipeline, and storage. Code path: ``lightrag.components.data_process``, ``lightrag.core.db``, and ``lightrag.database``. Components work on a sequence of ``Document`` and return a sequence of ``Document``. @@ -140,7 +142,7 @@ Components work on a sequence of ``Document`` and return a sequence of ``Documen Agent Essentials ------------------------------ +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Agent in ``components.agent`` is LLM great with reasoning, planning, and using tools to interact and accomplish tasks. .. list-table:: @@ -187,7 +189,7 @@ Agent in ``components.agent`` is LLM great with reasoning, planning, and using t Optimizing -============================= +------------------- Datasets and Evaulation @@ -214,7 +216,7 @@ Optimizer & Trainer Logging & Tracing & Configurations -============================= +------------------------------------ Code path: ``lightrag.utils``. .. list-table:: diff --git a/docs/source/index.rst b/docs/source/index.rst index 23b8bdb9..8c89ba0e 100644 --- a/docs/source/index.rst +++ b/docs/source/index.rst @@ -69,7 +69,19 @@ It is light, modular, and robust. qa = SimpleQA() answer = qa("What is LightRAG?") -LightRAG + +Why LightRAG? + + +Clarity and Simplicity +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ + + +We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity: + +- We maintain no more than two levels of subclasses. +- Each core abstract class is designed to be robust and flexible. +- We use 10X less code than other libraries to achieve 10X more robustness and flexibility. Here is the printed out structure of ``qa``: @@ -90,63 +102,28 @@ Here is the printed out structure of ``qa``: ) ) +Control and Transparency +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default: +- LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers. +- LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format -**Why LightRAG?** - - -1. **Clarity and Simplicity** - - We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity: - - - We maintain no more than two levels of subclasses. - - Each core abstract class is designed to be robust and flexible. - - We use 10X less code than other libraries to achieve 10X more robustness and flexibility. - - -2. **Control and Transparency** - - Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default: - - - LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers. - - LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format. - -3. **Suitted for Both Researchers and Production Engineers** - - On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions. - Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production. - With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data. - Researchers will want their code to be adapted into more products too. - - - - - -**LightRAG library structures as follows:** - -#TODO: One diagram to make people understand lightrag faster - -* `core` - Base abstractions, core functions, and core components like `Generator` and `Embedder` to support more advanced components. -* `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components. - - - - -:doc:`get_started/index` -~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ +It is the future of LLM applications +~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ -:doc:`developer_notes/index` +On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions. +Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production. +With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data. +Researchers will want their code to be adapted into more products too. -:doc:`apis/index` .. toctree:: :glob: :maxdepth: 1 - :caption: New Users :hidden: - get_started/index @@ -155,19 +132,13 @@ Here is the printed out structure of ``qa``: .. toctree:: :glob: :maxdepth: 1 + :hidden: developer_notes/index .. :caption: Tutorials - How each part works .. :hidden: - - - - - - - .. toctree:: :maxdepth: 1 :caption: Use Cases - How different parts are used to build various LLM applications @@ -177,8 +148,8 @@ Here is the printed out structure of ``qa``: .. toctree:: + :glob: :maxdepth: 1 - :caption: API Reference :hidden: apis/index