From 65950c1268a8234ddfff1676f4a014c0216b6c73 Mon Sep 17 00:00:00 2001
From: Li Yin <li.yin.gravity@gmail.com>
Date: Sat, 29 Jun 2024 11:47:45 -0700
Subject: [PATCH 1/2] document clean up

---
 developer_notes/generator.ipynb       | 44 +++++++++++++-
 developer_notes/generator_note.py     | 30 ++++++++++
 docs/source/developer_notes/index.rst | 13 ++--
 docs/source/index.rst                 | 85 ++++++++++++++++++---------
 lightrag/core/base_data_class.py      |  2 +-
 5 files changed, 136 insertions(+), 38 deletions(-)
 create mode 100644 developer_notes/generator_note.py
diff --git a/developer_notes/generator.ipynb b/developer_notes/generator.ipynb
index 67eb62a6..548ca532 100644
--- a/developer_notes/generator.ipynb
+++ b/developer_notes/generator.ipynb
@@ -74,10 +74,48 @@
   },
   {
    "cell_type": "code",
-   "execution_count": null,
+   "execution_count": 3,
    "metadata": {},
-   "outputs": [],
-   "source": []
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "GeneratorOutput(data='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?', error=None, usage=None, raw_response='LightRAG is a light-based Real-time Anomaly Generator, which is a special type of anomaly detection system. It uses a combination of visual and statistical techniques to detect unusual patterns or outliers in a dataset in real-time, often for purposes such as identifying security threats, detecting fraud, or monitoring system performance. Would you like to know more about its applications or how it works?')\n"
+     ]
+    }
+   ],
+   "source": [
+    "from lightrag.core import Component, Generator, Prompt\n",
+    "from lightrag.components.model_client import GroqAPIClient\n",
+    "from lightrag.utils import setup_env\n",
+    "\n",
+    "\n",
+    "class SimpleQA(Component):\n",
+    "    def __init__(self):\n",
+    "        super().__init__()\n",
+    "        template = r\"\"\"<SYS>\n",
+    "        You are a helpful assistant.\n",
+    "        </SYS>\n",
+    "        User: {{input_str}}\n",
+    "        You:\n",
+    "        \"\"\"\n",
+    "        self.generator = Generator(\n",
+    "            model_client=GroqAPIClient(), model_kwargs={\"model\": \"llama3-8b-8192\"}, template=template\n",
+    "        )\n",
+    "\n",
+    "    def call(self, query):\n",
+    "        return self.generator({\"input_str\": query})\n",
+    "\n",
+    "    async def acall(self, query):\n",
+    "        return await self.generator.acall({\"input_str\": query})\n",
+    "\n",
+    "\n",
+    "qa = SimpleQA()\n",
+    "answer = qa(\"What is LightRAG?\")\n",
+    "\n",
+    "print(answer)"
+   ]
   }
  ],
  "metadata": {
diff --git a/developer_notes/generator_note.py b/developer_notes/generator_note.py
new file mode 100644
index 00000000..5d05f31a
--- /dev/null
+++ b/developer_notes/generator_note.py
@@ -0,0 +1,30 @@
+from lightrag.core import Component, Generator
+from lightrag.components.model_client import GroqAPIClient
+from lightrag.utils import setup_env  # noqa
+
+
+class SimpleQA(Component):
+    def __init__(self):
+        super().__init__()
+        template = r"""<SYS>
+        You are a helpful assistant.
+        </SYS>
+        User: {{input_str}}
+        You:
+        """
+        self.generator = Generator(
+            model_client=GroqAPIClient(),
+            model_kwargs={"model": "llama3-8b-8192"},
+            template=template,
+        )
+
+    def call(self, query):
+        return self.generator({"input_str": query})
+
+    async def acall(self, query):
+        return await self.generator.acall({"input_str": query})
+
+
+qa = SimpleQA()
+answer = qa("What is LightRAG?")
+print(qa)
diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst
index 28cb8c41..6e25d191 100644
--- a/docs/source/developer_notes/index.rst
+++ b/docs/source/developer_notes/index.rst
@@ -26,17 +26,18 @@ This is our tutorials before you move ahead to build use cases  (LLM application
    .. :width: 200px
 
 LightRAG library focus on providing building blocks for developers to **build** and **optimize** the `task pipeline`.
-We have clear design phisolophy:
+We have clear design phisolophy: :doc:`lightrag_design_philosophy`.
 
 
 
-.. toctree::
-   :maxdepth: 1
-   :caption: Introduction
+..    :maxdepth: 1
+..    :hidden:
+
+..    lightrag_design_philosophy
+
 
-   lightrag_design_philosophy
 
-   llm_intro
+..  llm_intro
 
 
 
diff --git a/docs/source/index.rst b/docs/source/index.rst
index b23ce133..23b8bdb9 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -1,17 +1,13 @@
-.. LightRAG documentation master file, created by
-   sphinx-quickstart on Thu May  9 15:45:29 2024.
-   You can adapt this file completely to your liking, but it should at least
-   contain the root `toctree` directive.
 =======================
-LightRAG Home
+Introduction
 =======================
 
 
-LightRAG is the "PyTorch" library for building large langage model(LLM) applications. It is super light, modular and robust like "PyTorch", and offers essential components for `Retriever`-`Agent`-`Generator` (RAG).
+LightRAG is the "PyTorch" library for building large langage model(LLM) applications. We help developers on both building and optimimizing `Retriever`-`Agent`-`Generator` (RAG) pipelines.
+It is light, modular, and robust.
 
-You have a similar coding experience as PyTorch. Here is a side to side comparison of writing a PyTorch module and a LightRAG component:
 
-.. grid:: 2
+.. grid:: 1
    :gutter: 1
 
    .. grid-item-card::  PyTorch
@@ -43,25 +39,56 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
 
       .. code-block:: python
 
-         from core.component import Component, Generator
-         from components.model_client import OpenAIClient
+         from lightrag.core import Component, Generator
+         from lightrag.components.model_client import GroqAPIClient
+         from lightrag.utils import setup_env
+
 
          class SimpleQA(Component):
             def __init__(self):
                super().__init__()
+               template = r"""<SYS>
+               You are a helpful assistant.
+               </SYS>
+               User: {{input_str}}
+               You:
+               """
                self.generator = Generator(
-                  model_client=OpenAIClient(),
-                  model_kwargs={'model': 'gpt-3.5-turbo'}
+                     model_client=GroqAPIClient(),
+                     model_kwargs={"model": "llama3-8b-8192"},
+                     template=template,
                )
 
             def call(self, query):
-               return self.generator.call({'input_str': query})
+               return self.generator({"input_str": query})
 
             async def acall(self, query):
-               return await self.generator.acall({'input_str': query})
+               return await self.generator.acall({"input_str": query})
+
 
          qa = SimpleQA()
-         print(qa)
+         answer = qa("What is LightRAG?")
+
+LightRAG
+
+Here is the printed out structure of ``qa``:
+
+.. code-block::
+
+   SimpleQA(
+      (generator): Generator(
+         model_kwargs={'model': 'llama3-8b-8192'},
+         (prompt): Prompt(
+            template: <SYS>
+                  You are a helpful assistant.
+                  </SYS>
+                  User: {{input_str}}
+                  You:
+                  , prompt_variables: ['input_str']
+         )
+         (model_client): GroqAPIClient()
+      )
+   )
 
 
 
@@ -94,7 +121,6 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
 
 
 
-**LightRAG vs other LLM libraries:**
 
 
 **LightRAG library structures as follows:**
@@ -105,21 +131,20 @@ You have a similar coding experience as PyTorch. Here is a side to side comparis
 * `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components.
 
 
-**LightRAG documentation is divided into two parts:**
-
-* **Developer Documentation**: This documentation explains how LightRAG is designed in more depth and is especially useful for developers who want to contribute to LightRAG.
 
-* **User Documentation**: This documentation is for users who want to use LightRAG to build their applications.
 
-We encourage all users to at least skim through the developer documentation. Different from "PyTorch" where a normal user does not have to customize a building module for neural network,
-LLM applications have much bigger scope and varies even more to different product environments, so developers customizing components on their own is much more common.
+:doc:`get_started/index`
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+:doc:`developer_notes/index`
 
+:doc:`apis/index`
 
 .. toctree::
    :glob:
    :maxdepth: 1
    :caption: New Users
+   :hidden:
 
 
    get_started/index
@@ -130,9 +155,13 @@ LLM applications have much bigger scope and varies even more to different produc
 .. toctree::
    :glob:
    :maxdepth: 1
-   :caption: Tutorials - How each part works
 
    developer_notes/index
+   .. :caption: Tutorials - How each part works
+   .. :hidden:
+
+
+
 
 
 
@@ -142,6 +171,7 @@ LLM applications have much bigger scope and varies even more to different produc
 .. toctree::
    :maxdepth: 1
    :caption: Use Cases - How different parts are used to build various LLM applications
+   :hidden:
 
    tutorials/index
 
@@ -149,16 +179,14 @@ LLM applications have much bigger scope and varies even more to different produc
 .. toctree::
    :maxdepth: 1
    :caption: API Reference
+   :hidden:
 
    apis/index
 
-.. todo::
 
-   .. toctree::
-      :maxdepth: 1
-      :caption: Benchmarks
+      .. :caption: Benchmarks
 
-      Manually add documents for the code in benchmarks
+      .. Manually add documents for the code in benchmarks
 
 
 ..    :glob:
@@ -172,5 +200,6 @@ LLM applications have much bigger scope and varies even more to different produc
    :glob:
    :maxdepth: 1
    :caption: For Contributors
+   :hidden:
 
    contributor/index
diff --git a/lightrag/core/base_data_class.py b/lightrag/core/base_data_class.py
index 4d84059b..e434097d 100644
--- a/lightrag/core/base_data_class.py
+++ b/lightrag/core/base_data_class.py
@@ -179,7 +179,7 @@ class MyOutputs(DataClass):
     def __post_init__(self):
 
         for f in fields(self):
-            if "desc" not in f.metadata or "desription" not in f.metadata:
+            if "desc" not in f.metadata and "description" not in f.metadata:
                 warnings.warn(
                     f"Class {  self.__class__.__name__} Field {f.name} is missing 'desc' in metadata",
                     UserWarning,

From 75ab0ea32d1d49d7383607a9e952db14639791c3 Mon Sep 17 00:00:00 2001
From: Li Yin <li.yin.gravity@gmail.com>
Date: Sat, 29 Jun 2024 12:09:06 -0700
Subject: [PATCH 2/2] home page and developer notes

---
 docs/source/developer_notes/index.rst | 26 +++++----
 docs/source/index.rst                 | 81 +++++++++------------------
 2 files changed, 40 insertions(+), 67 deletions(-)

diff --git a/docs/source/developer_notes/index.rst b/docs/source/developer_notes/index.rst
index 6e25d191..0029d1da 100644
--- a/docs/source/developer_notes/index.rst
+++ b/docs/source/developer_notes/index.rst
@@ -9,9 +9,11 @@ Developer Notes
 Learn LightRAG design phisolophy and the `why` and `how-to` (customize and integrate) behind each core part within the LightRAG library.
 This is our tutorials before you move ahead to build use cases  (LLM applications) end to end.
 
-.. note::
+.. raw::
 
-   You can read interchangably between :ref:`Use Cases <use_cases>`.
+  .. note::
+
+    You can read interchangably between :ref:`Use Cases <use_cases>`.
 
 
 
@@ -43,10 +45,9 @@ We have clear design phisolophy: :doc:`lightrag_design_philosophy`.
 
 
 Building
-=============================
-
+-------------------
 Base classes
----------------
+~~~~~~~~~~~~~~~~~~~~~~
 Code path: ``lightrag.core``.
 
 .. list-table::
@@ -74,9 +75,10 @@ Code path: ``lightrag.core``.
    base_data_class
 
 RAG Essentials
--------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 RAG components
-~~~~~~~~~~~~~~~~~~~~~~
+^^^^^^^^^^^^^^^^^^^
+
 
 Code path: ``lightrag.core``. For abstract classes:
 
@@ -101,8 +103,8 @@ Code path: ``lightrag.core``. For abstract classes:
    * - :doc:`retriever`
      - The base class for all retrievers who in particular retrieve relevant documents from a given database to add **context** to the generator.
 
-Data, Processing, and storage
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Data Pipeline and Storage
+^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^
 
 Data Processing: including transformer, pipeline, and storage. Code path: ``lightrag.components.data_process``, ``lightrag.core.db``, and ``lightrag.database``.
 Components work on a sequence of ``Document`` and return a sequence of ``Document``.
@@ -140,7 +142,7 @@ Components work on a sequence of ``Document`` and return a sequence of ``Documen
 
 
 Agent Essentials
------------------------------
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 Agent in ``components.agent`` is LLM great with reasoning, planning, and using tools to interact and accomplish tasks.
 
 .. list-table::
@@ -187,7 +189,7 @@ Agent in ``components.agent`` is LLM great with reasoning, planning, and using t
 
 
 Optimizing
-=============================
+-------------------
 
 Datasets and Evaulation
 
@@ -214,7 +216,7 @@ Optimizer & Trainer
 
 
 Logging & Tracing & Configurations
-=============================
+------------------------------------
 Code path: ``lightrag.utils``.
 
 .. list-table::
diff --git a/docs/source/index.rst b/docs/source/index.rst
index 23b8bdb9..8c89ba0e 100644
--- a/docs/source/index.rst
+++ b/docs/source/index.rst
@@ -69,7 +69,19 @@ It is light, modular, and robust.
          qa = SimpleQA()
          answer = qa("What is LightRAG?")
 
-LightRAG
+
+Why LightRAG?
+
+
+Clarity and Simplicity
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+
+We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity:
+
+- We maintain no more than two levels of subclasses.
+- Each core abstract class is designed to be robust and flexible.
+- We use 10X less code than other libraries to achieve 10X more robustness and flexibility.
 
 Here is the printed out structure of ``qa``:
 
@@ -90,63 +102,28 @@ Here is the printed out structure of ``qa``:
       )
    )
 
+Control and Transparency
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
+Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default:
 
+- LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers.
+- LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format
 
-**Why LightRAG?**
-
-
-1. **Clarity and Simplicity**
-
-   We understand that developers building real-world Large Language Model (LLM) applications are the real heroes. Just like AI researchers and engineers who build models on top of PyTorch, developers require **Maximum Flexibility and Customizability**: Each developer has unique data needs to build their own models/components, experiment with In-context Learning (ICL) or model finetuning, and deploy the LLM applications to production. This means the library must provide fundamental lower-level building blocks and strive for clarity and simplicity:
-
-   - We maintain no more than two levels of subclasses.
-   - Each core abstract class is designed to be robust and flexible.
-   - We use 10X less code than other libraries to achieve 10X more robustness and flexibility.
-
-
-2. **Control and Transparency**
-
-   Coming from a deep AI research background, we understand that the more control and transparency developers have over their prompts, the better. In default:
-
-   - LightRAG simplifies what developers need to send to LLM proprietary APIs to just two messages each time: a `system message` and a `user message`. This minimizes reliance on and manipulation by API providers.
-   - LightRAG provides advanced tooling for developers to build `agents`, `tools/function calls`, etc., without relying on any proprietary API provider's 'advanced' features such as `OpenAI` assistant, tools, and JSON format.
-
-3. **Suitted for Both Researchers and Production Engineers**
-
-   On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions.
-   Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production.
-   With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data.
-   Researchers will want their code to be adapted into more products too.
-
-
-
-
-
-**LightRAG library structures as follows:**
-
-#TODO: One diagram to make people understand lightrag faster
-
-* `core` - Base abstractions, core functions, and core components like `Generator` and `Embedder` to support more advanced components.
-* `components` - Components that are built on top of the core directive. Users will install relevant depencides on their own for some components.
-
-
-
-
-:doc:`get_started/index`
-~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+It is the future of LLM applications
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
 
-:doc:`developer_notes/index`
+On top of the easiness to use, we in particular optimize the configurability of components for researchers to build their solutions and to benchmark existing solutions.
+Like how PyTorch has united both researchers and production teams, it enables smooth transition from research to production.
+With researchers building on LightRAG, production engineers can easily take over the method and test and iterate on their production data.
+Researchers will want their code to be adapted into more products too.
 
-:doc:`apis/index`
 
 .. toctree::
    :glob:
    :maxdepth: 1
-   :caption: New Users
    :hidden:
 
-
    get_started/index
 
 
@@ -155,19 +132,13 @@ Here is the printed out structure of ``qa``:
 .. toctree::
    :glob:
    :maxdepth: 1
+   :hidden:
 
    developer_notes/index
    .. :caption: Tutorials - How each part works
    .. :hidden:
 
 
-
-
-
-
-
-
-
 .. toctree::
    :maxdepth: 1
    :caption: Use Cases - How different parts are used to build various LLM applications
@@ -177,8 +148,8 @@ Here is the printed out structure of ``qa``:
 
 
 .. toctree::
+   :glob:
    :maxdepth: 1
-   :caption: API Reference
    :hidden:
 
    apis/index