From 65950c1268a8234ddfff1676f4a014c0216b6c73 Mon Sep 17 00:00:00 2001
From: Li Yin
Date: Sat, 29 Jun 2024 11:47:45 -0700
Subject: [PATCH 1/2] document clean up
---
developer_notes/generator.ipynb | 44 +++++++++++++-
developer_notes/generator_note.py | 30 ++++++++++
docs/source/developer_notes/index.rst | 13 ++--
docs/source/index.rst | 85 ++++++++++++++++++---------
lightrag/core/base_data_class.py | 2 +-
5 files changed, 136 insertions(+), 38 deletions(-)
create mode 100644 developer_notes/generator_note.py
diff --git a/developer_notes/generator.ipynb b/developer_notes/generator.ipynb
index 67eb62a6..548ca532 100644
--- a/developer_notes/generator.ipynb
+++ b/developer_notes/generator.ipynb
@@ -74,10 +74,48 @@
},
{
"cell_type": "code",
- "execution_count": null,
+ "execution_count": 3,
"metadata": {},
- "outputs": [],
- "source": []
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "GeneratorOutput(data='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?', error=None, usage=None, raw_response='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?')\n"
+ ]
+ }
+ ],
+ "source": [
+ "from lightrag.core import Component, Generator, Prompt\n",
+ "from lightrag.components.model_client import GroqAPIClient\n",
+ "from lightrag.utils import setup_env\n",
+ "\n",
+ "\n",
+ "class SimpleQA(Component):\n",
+ " def __init__(self):\n",
+ " super().__init__()\n",
+ " template = r\"\"\"\n",
+ " You are a helpful assistant.\n",
+ " \n",
+ " User: {{input_str}}\n",
+ " You:\n",
+ " \"\"\"\n",
+ " self.generator = Generator(\n",
+ " model_client=GroqAPIClient(), model_kwargs={\"model\": \"llama3-8b-8192\"}, template=template\n",
+ " )\n",
+ "\n",
+ " def call(self, query):\n",
+ " return self.generator({\"input_str\": query})\n",
+ "\n",
+ " async def acall(self, query):\n",
+ " return await self.generator.acall({\"input_str\": query})\n",
+ "\n",
+ "\n",
+ "qa = SimpleQA()\n",
+ "answer = qa(\"What is LightRAG?\")\n",
+ "\n",
+ "print(answer)"
+ ]
}
],
"metadata": {
diff --git a/developer_notes/generator_note.py b/developer_notes/generator_note.py
new file mode 100644
index 00000000..5d05f31a
--- /dev/null
+++ b/developer_notes/generator_note.py
@@ -0,0 +1,30 @@
+from lightrag.core import Component, Generator
+from lightrag.components.model_client import GroqAPIClient
+from lightrag.utils import setup_env # noqa
+
+
+class SimpleQA(Component):
+ def __init__(self):
+ super().__init__()
+ template = r"""
+ You are a helpful assistant.
+
+ User: {{input_str}}
+ You:
+ """
+ self.generator = Generator(
+ model_client=GroqAPIClient(),
+ model_kwargs={"model": "llama3-8b-8192"},
+ template=template,
+ )
+
+ def call(self, query):
+ return self.generator({"input_str": query})
+
+ async def acall(self, query):
+ return await self.generator.acall({"input_str": query})
+
+
+qa = SimpleQA()
+answer = qa("What is LightRAG?")
+print(qa)
diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst
index 28cb8c41..6e25d191 100644
--- a/docs/source/developer_notes/index.rst
+++ b/docs/source/developer_notes/index.rst
@@ -26,17 +26,18 @@ This is our tutorials before you move ahead to build use cases (LLM application
.. :width: 200px
LightRAG library focus on providing building blocks for developers to **build** and **optimize** the `task pipeline`.
-We have clear design phisolophy:
+We have clear design phisolophy: :doc:`lightrag_design_philosophy`.
-.. toctree::
- :maxdepth: 1
- :caption: Introduction
+.. :maxdepth: 1
+.. :hidden:
+
+.. lightrag_design_philosophy
+
- lightrag_design_philosophy
- llm_intro
+.. llm_intro
diff --git a/docs/source/index.rst b/docs/source/index.rst
index b23ce133..23b8bdb9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,17 +1,13 @@
-.. LightRAG documentation master file, created by
- sphinx-quickstart on Thu May 9 15:45:29 2024.
- You can adapt this file completely to your liking, but it should at least
- contain the root `toctree` directive.
=======================
-LightRAG Home
+Introduction
=======================
-LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is super light, modular and robust like "PyTorch", and offers essential components for `Retriever`-`Agent`-`Generator` (RAG).
+LightRAG is the "PyTorch" library for building large langage model(LLM) applications. We help developers on both building and optimimizing `Retriever`-`Agent`-`Generator` (RAG) pipelines.
+It is light, modular, and robust.
-You have a similar coding experience as PyTorch. Here is a side to side comparison of writing a PyTorch module and a LightRAG component:
-.. grid:: 2
+.. grid:: 1
:gutter: 1
.. grid-item-card:: PyTorch
@@ -43,25 +39,56 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
.. code-block:: python
- from core.component import Component, Generator
- from components.model_client import OpenAIClient
+ from lightrag.core import Component, Generator
+ from lightrag.components.model_client import GroqAPIClient
+ from lightrag.utils import setup_env
+
class SimpleQA(Component):
def __init__(self):
super().__init__()
+ template = r"""
+ You are a helpful assistant.
+
+ User: {{input_str}}
+ You:
+ """
self.generator = Generator(
- model_client=OpenAIClient(),
- model_kwargs={'model': 'gpt-3.5-turbo'}
+ model_client=GroqAPIClient(),
+ model_kwargs={"model": "llama3-8b-8192"},
+ template=template,
)
def call(self, query):
- return self.generator.call({'input_str': query})
+ return self.generator({"input_str": query})
async def acall(self, query):
- return await self.generator.acall({'input_str': query})
+ return await self.generator.acall({"input_str": query})
+
qa = SimpleQA()
- print(qa)
+ answer = qa("What is LightRAG?")
+
+LightRAG
+
+Here is the printed out structure of ``qa``:
+
+.. code-block::
+
+ SimpleQA(
+ (generator): Generator(
+ model_kwargs={'model': 'llama3-8b-8192'},
+ (prompt): Prompt(
+ template:
+ You are a helpful assistant.
+
+ User: {{input_str}}
+ You:
+ , prompt_variables: ['input_str']
+ )
+ (model_client): GroqAPIClient()
+ )
+ )
@@ -94,7 +121,6 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
-**LightRAG vs other LLM libraries:**
**LightRAG library structures as follows:**
@@ -105,21 +131,20 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
* `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components.
-**LightRAG documentation is divided into two parts:**
-
-* **Developer Documentation**: This documentation explains how LightRAG is designed in more depth and is especially useful for developers who want to contribute to LightRAG.
-* **User Documentation**: This documentation is for users who want to use LightRAG to build their applications.
-We encourage all users to at least skim through the developer documentation. Different from "PyTorch" where a normal user does not have to customize a building module for neural network,
-LLM applications have much bigger scope and varies even more to different product environments, so developers customizing components on their own is much more common.
+:doc:`get_started/index`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+:doc:`developer_notes/index`
+:doc:`apis/index`
.. toctree::
:glob:
:maxdepth: 1
:caption: New Users
+ :hidden:
get_started/index
@@ -130,9 +155,13 @@ LLM applications have much bigger scope and varies even more to different produc
.. toctree::
:glob:
:maxdepth: 1
- :caption: Tutorials - How each part works
developer_notes/index
+ .. :caption: Tutorials - How each part works
+ .. :hidden:
+
+
+
@@ -142,6 +171,7 @@ LLM applications have much bigger scope and varies even more to different produc
.. toctree::
:maxdepth: 1
:caption: Use Cases - How different parts are used to build various LLM applications
+ :hidden:
tutorials/index
@@ -149,16 +179,14 @@ LLM applications have much bigger scope and varies even more to different produc
.. toctree::
:maxdepth: 1
:caption: API Reference
+ :hidden:
apis/index
-.. todo::
- .. toctree::
- :maxdepth: 1
- :caption: Benchmarks
+ .. :caption: Benchmarks
- Manually add documents for the code in benchmarks
+ .. Manually add documents for the code in benchmarks
.. :glob:
@@ -172,5 +200,6 @@ LLM applications have much bigger scope and varies even more to different produc
:glob:
:maxdepth: 1
:caption: For Contributors
+ :hidden:
contributor/index
diff --git a/lightrag/core/base_data_class.py b/lightrag/core/base_data_class.py
index 4d84059b..e434097d 100644
--- a/lightrag/core/base_data_class.py
+++ b/lightrag/core/base_data_class.py
@@ -179,7 +179,7 @@ class MyOutputs(DataClass):
def __post_init__(self):
for f in fields(self):
- if "desc" not in f.metadata or "desription" not in f.metadata:
+ if "desc" not in f.metadata and "description" not in f.metadata:
warnings.warn(
f"Class { self.__class__.__name__} Field {f.name} is missing 'desc' in metadata",
UserWarning,
From 75ab0ea32d1d49d7383607a9e952db14639791c3 Mon Sep 17 00:00:00 2001
From: Li Yin
Date: Sat, 29 Jun 2024 12:09:06 -0700
Subject: [PATCH 2/2] home page and developer notes
---
docs/source/developer_notes/index.rst | 26 +++++----
docs/source/index.rst | 81 +++++++++------------------
2 files changed, 40 insertions(+), 67 deletions(-)
diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst
index 6e25d191..0029d1da 100644
--- a/docs/source/developer_notes/index.rst
+++ b/docs/source/developer_notes/index.rst
@@ -9,9 +9,11 @@ Developer Notes
Learn LightRAG design phisolophy and the `why` and `how-to` (customize and integrate) behind each core part within the LightRAG library.
This is our tutorials before you move ahead to build use cases (LLM applications) end to end.
-.. note::
+.. raw::
- You can read interchangably between :ref:`Use Cases `.
+ .. note::
+
+ You can read interchangably between :ref:`Use Cases `.
@@ -43,10 +45,9 @@ We have clear design phisolophy: :doc:`lightrag_design_philosophy`.
Building
-=============================
-
+-------------------
Base classes
----------------
+~~~~~~~~~~~~~~~~~~~~~~
Code path: ``lightrag.core``.
.. list-table::
@@ -74,9 +75,10 @@ Code path: ``lightrag.core``.
base_data_class
RAG Essentials
--------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
RAG components
-~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^
+
Code path: ``lightrag.core``. For abstract classes:
@@ -101,8 +103,8 @@ Code path: ``lightrag.core``. For abstract classes:
* - :doc:`retriever`
- The base class for all retrievers who in particular retrieve relevant documents from a given database to add **context** to the generator.
-Data, Processing, and storage
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Data Pipeline and Storage
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
Data Processing: including transformer, pipeline, and storage. Code path: ``lightrag.components.data_process``, ``lightrag.core.db``, and ``lightrag.database``.
Components work on a sequence of ``Document`` and return a sequence of ``Document``.
@@ -140,7 +142,7 @@ Components work on a sequence of ``Document`` and return a sequence of ``Documen
Agent Essentials
------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
Agent in ``components.agent`` is LLM great with reasoning, planning, and using tools to interact and accomplish tasks.
.. list-table::
@@ -187,7 +189,7 @@ Agent in ``components.agent`` is LLM great with reasoning, planning, and using t
Optimizing
-=============================
+-------------------
Datasets and Evaulation
@@ -214,7 +216,7 @@ Optimizer & Trainer
Logging & Tracing & Configurations
-=============================
+------------------------------------
Code path: ``lightrag.utils``.
.. list-table::
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 23b8bdb9..8c89ba0e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -69,7 +69,19 @@ It is light, modular, and robust.
qa = SimpleQA()
answer = qa("What is LightRAG?")
-LightRAG
+
+Why LightRAG?
+
+
+Clarity and Simplicity
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity:
+
+- We maintain no more than two levels of subclasses.
+- Each core abstract class is designed to be robust and flexible.
+- We use 10X less code than other libraries to achieve 10X more robustness and flexibility.
Here is the printed out structure of ``qa``:
@@ -90,63 +102,28 @@ Here is the printed out structure of ``qa``:
)
)
+Control and Transparency
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default:
+- LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers.
+- LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format
-**Why LightRAG?**
-
-
-1. **Clarity and Simplicity**
-
- We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity:
-
- - We maintain no more than two levels of subclasses.
- - Each core abstract class is designed to be robust and flexible.
- - We use 10X less code than other libraries to achieve 10X more robustness and flexibility.
-
-
-2. **Control and Transparency**
-
- Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default:
-
- - LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers.
- - LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format.
-
-3. **Suitted for Both Researchers and Production Engineers**
-
- On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions.
- Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production.
- With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data.
- Researchers will want their code to be adapted into more products too.
-
-
-
-
-
-**LightRAG library structures as follows:**
-
-#TODO: One diagram to make people understand lightrag faster
-
-* `core` - Base abstractions, core functions, and core components like `Generator` and `Embedder` to support more advanced components.
-* `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components.
-
-
-
-
-:doc:`get_started/index`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is the future of LLM applications
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
-:doc:`developer_notes/index`
+On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions.
+Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production.
+With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data.
+Researchers will want their code to be adapted into more products too.
-:doc:`apis/index`
.. toctree::
:glob:
:maxdepth: 1
- :caption: New Users
:hidden:
-
get_started/index
@@ -155,19 +132,13 @@ Here is the printed out structure of ``qa``:
.. toctree::
:glob:
:maxdepth: 1
+ :hidden:
developer_notes/index
.. :caption: Tutorials - How each part works
.. :hidden:
-
-
-
-
-
-
-
.. toctree::
:maxdepth: 1
:caption: Use Cases - How different parts are used to build various LLM applications
@@ -177,8 +148,8 @@ Here is the printed out structure of ``qa``:
.. toctree::
+ :glob:
:maxdepth: 1
- :caption: API Reference
:hidden:
apis/index