From 2371bb16bf47b11a905a8548a2389ab0838c84c8 Mon Sep 17 00:00:00 2001
From: Mike Lee <mikecovlee@163.com>
Date: Fri, 15 Nov 2024 10:01:15 +0800
Subject: [PATCH 1/5] [deps] update requirements

---
 .github/workflows/python-publish.yml | 2 +-
 .github/workflows/python-test.yml    | 4 ++--
 Dockerfile                           | 2 +-
 Install.md                           | 6 +++---
 misc/finetune-demo.ipynb             | 2 +-
 pyproject.toml                       | 6 +++---
 requirements.txt                     | 6 +++---
 7 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml
index 8d22aeb..2c21fae 100644
--- a/.github/workflows/python-publish.yml
+++ b/.github/workflows/python-publish.yml
@@ -25,7 +25,7 @@ jobs:
     - name: Set up Python
       uses: actions/setup-python@v3
       with:
-        python-version: '3.11'
+        python-version: '3.12'
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml
index 4e4ef83..bd04b22 100644
--- a/.github/workflows/python-test.yml
+++ b/.github/workflows/python-test.yml
@@ -15,10 +15,10 @@ jobs:
 
     steps:
     - uses: actions/checkout@v3
-    - name: Set up Python 3.11
+    - name: Set up Python 3.12
       uses: actions/setup-python@v3
       with:
-        python-version: "3.11"
+        python-version: "3.12"
     - name: Install dependencies
       run: |
         python -m pip install --upgrade pip
diff --git a/Dockerfile b/Dockerfile
index b5d47ab..5246f1b 100644
--- a/Dockerfile
+++ b/Dockerfile
@@ -1,6 +1,6 @@
 FROM nvidia/cuda:12.5.1-devel-ubuntu22.04
 
-ARG PYTHON_VERSION=3.11
+ARG PYTHON_VERSION=3.12
 ARG http_proxy
 ARG https_proxy
 
diff --git a/Install.md b/Install.md
index 2e17f11..e88b5c9 100644
--- a/Install.md
+++ b/Install.md
@@ -65,7 +65,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully.
 git clone https://github.com/TUDB-Labs/MoE-PEFT
 cd moe_peft
 # Optional but recommended
-conda create -n moe_peft python=3.11
+conda create -n moe_peft python=3.12
 conda activate moe_peft
 # Install requirements
 pip3 install -r requirements.txt --upgrade
@@ -116,7 +116,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully.
 git clone https://github.com/TUDB-Labs/MoE-PEFT
 cd moe_peft
 # Optional but recommended
-conda create -n moe_peft python=3.11
+conda create -n moe_peft python=3.12
 conda activate moe_peft
 # Install requirements (CUDA 12.1)
 pip3 install torch==2.3.1 --index-url https://download.pytorch.org/whl/cu121
@@ -164,7 +164,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully.
 git clone https://github.com/TUDB-Labs/MoE-PEFT
 cd moe_peft
 # Optional but recommended
-conda create -n moe_peft python=3.11
+conda create -n moe_peft python=3.12
 conda activate moe_peft
 # Install requirements
 pip3 install -r requirements.txt --upgrade
diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb
index 66ee3ba..c6f21eb 100644
--- a/misc/finetune-demo.ipynb
+++ b/misc/finetune-demo.ipynb
@@ -149,7 +149,7 @@
    "name": "python",
    "nbconvert_exporter": "python",
    "pygments_lexer": "ipython3",
-   "version": "3.11.9"
+   "version": "3.12.7"
   }
  },
  "nbformat": 4,
diff --git a/pyproject.toml b/pyproject.toml
index 5b929f2..7c21987 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta"
 
 [project]
 name = "moe_peft"
-version = "2.0.0"
+version = "2.0.1"
 description = "An Efficient LLM Fine-Tuning Factory Optimized for MoE PEFT"
 readme = "README.md"
 requires-python = ">=3.8"
@@ -14,11 +14,11 @@ classifiers = [
     "Operating System :: OS Independent",
 ]
 dependencies = [
-    "torch>=2.3.0,<2.5.0",
+    "torch>=2.4.0,<2.6.0",
     "datasets",
     "evaluate",
     "accelerate",
-    "transformers>=4.44.0,<4.46.0",
+    "transformers>=4.44.0,<4.47.0",
     "sentencepiece",
     "huggingface_hub",
     "scikit-learn",
diff --git a/requirements.txt b/requirements.txt
index 88c514b..97dfa58 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,14 +1,14 @@
-torch>=2.3.0,<2.5.0
+torch>=2.4.0,<2.6.0
 datasets
 evaluate
 accelerate
-transformers>=4.44.0,<4.46.0
+transformers>=4.44.0,<4.47.0
 sentencepiece
 huggingface_hub
 scikit-learn
 tiktoken
 mixlora>=0.2.2,<0.3.0
-gradio==4.38.1
+gradio
 peft==0.11.1
 pandas
 fire

From 7a5e36211a83d98081421642533006fe2acfbdf1 Mon Sep 17 00:00:00 2001
From: Mike Lee <mikecovlee@163.com>
Date: Mon, 18 Nov 2024 16:37:17 +0800
Subject: [PATCH 2/5] format codes

---
 generate.py           | 4 ++--
 moe_peft.py           | 4 ++--
 moe_peft/evaluator.py | 2 +-
 tests/dummy_train.py  | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/generate.py b/generate.py
index ea90992..34e954f 100644
--- a/generate.py
+++ b/generate.py
@@ -56,9 +56,9 @@ def main(
     )
 
     for prompt in output[adapter_name]:
-        print(f"\n{'='*10}\n")
+        print(f"\n{'=' * 10}\n")
         print(prompt)
-        print(f"\n{'='*10}\n")
+        print(f"\n{'=' * 10}\n")
 
 
 if __name__ == "__main__":
diff --git a/moe_peft.py b/moe_peft.py
index 1f08794..a70f7eb 100644
--- a/moe_peft.py
+++ b/moe_peft.py
@@ -212,12 +212,12 @@ def inference(
             cache_implementation=args.cache_implementation,
             stream_callback=callback,
         )
-        print(f"\n{'='*10}\n")
+        print(f"\n{'=' * 10}\n")
         print(f"PROMPT: {input_raw}")
         for adapter_name, output in outputs.items():
             print(f"{adapter_name} OUTPUT:")
             print(output[0])
-        print(f"\n{'='*10}\n")
+        print(f"\n{'=' * 10}\n")
 
 
 # Main Function
diff --git a/moe_peft/evaluator.py b/moe_peft/evaluator.py
index 369bfcc..07a0b0c 100644
--- a/moe_peft/evaluator.py
+++ b/moe_peft/evaluator.py
@@ -184,7 +184,7 @@ def _compute_metrcis(model, current_configs, sequence_lengths, batch_labels, out
                         router_statistic_[idx] += val
                 for idx, val in enumerate(router_statistic_):
                     logging.info(
-                        f"{config.adapter_name}: expert {idx}, load = {val/32}"
+                        f"{config.adapter_name}: expert {idx}, load = {val / 32}"
                     )
 
         batch_size = logits.shape[0]
diff --git a/tests/dummy_train.py b/tests/dummy_train.py
index bb6b1b8..0b973b2 100644
--- a/tests/dummy_train.py
+++ b/tests/dummy_train.py
@@ -69,12 +69,12 @@ def main(
             max_gen_len=128,
         )
 
-    print(f"\n{'='*10}\n")
+    print(f"\n{'=' * 10}\n")
     print(f"PROMPT: {test_prompt}\n")
     for adapter_name, output in outputs.items():
         print(f"{adapter_name} OUTPUT:")
         print(f"{output[0]}\n")
-    print(f"\n{'='*10}\n")
+    print(f"\n{'=' * 10}\n")
 
 
 if __name__ == "__main__":

From 850db5e42260dea8b2938d1accc89ded4203907b Mon Sep 17 00:00:00 2001
From: Mike Lee <mikecovlee@163.com>
Date: Mon, 18 Nov 2024 16:37:26 +0800
Subject: [PATCH 3/5] update notebooks

---
 misc/finetune-demo.ipynb  |  10 +-
 misc/inference-demo.ipynb | 422 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 428 insertions(+), 4 deletions(-)
 create mode 100644 misc/inference-demo.ipynb

diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb
index c6f21eb..df376d0 100644
--- a/misc/finetune-demo.ipynb
+++ b/misc/finetune-demo.ipynb
@@ -4,7 +4,7 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory Optimized for MoE PEFT\n",
+    "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n",
     "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n",
     "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n",
     "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n",
@@ -12,13 +12,15 @@
     "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n",
     "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n",
     "\n",
-    "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA) developed by the [IDs Lab](https://ids-lab-asia.github.io) at Sichuan University. It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as LoRA, DoRA, MixLoRA, and others. Key features of MoE-PEFT include:\n",
+    "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n",
     "\n",
-    "- Concurrent fine-tuning of multiple adapters with a shared pre-trained model.\n",
+    "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n",
+    "\n",
+    "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n",
     "\n",
     "- Support for multiple PEFT algorithms and various pre-trained models.\n",
     "\n",
-    "- MoE PEFT optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA).\n",
+    "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n",
     "\n",
     "## About this notebook\n",
     "\n",
diff --git a/misc/inference-demo.ipynb b/misc/inference-demo.ipynb
new file mode 100644
index 0000000..8e4e6ef
--- /dev/null
+++ b/misc/inference-demo.ipynb
@@ -0,0 +1,422 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n",
+    "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n",
+    "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n",
+    "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n",
+    "[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n",
+    "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n",
+    "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n",
+    "\n",
+    "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n",
+    "\n",
+    "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n",
+    "\n",
+    "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n",
+    "\n",
+    "- Support for multiple PEFT algorithms and various pre-trained models.\n",
+    "\n",
+    "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n",
+    "\n",
+    "## About this notebook\n",
+    "\n",
+    "This is a simple jupiter notebook for showcasing the basic process of building chatbot with TinyLLaMA."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clone and install MoE-PEFT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip uninstall torchvision torchaudio -y\n",
+    "! pip install moe_peft"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "application/vnd.jupyter.widget-view+json": {
+       "model_id": "365a13c3d8654e51ad894b8459a5297c",
+       "version_major": 2,
+       "version_minor": 0
+      },
+      "text/plain": [
+       "Loading checkpoint shards:   0%|          | 0/2 [00:00<?, ?it/s]"
+      ]
+     },
+     "metadata": {},
+     "output_type": "display_data"
+    }
+   ],
+   "source": [
+    "import torch\n",
+    "\n",
+    "import moe_peft\n",
+    "\n",
+    "base_model = \"google/gemma-2-2b-it\"\n",
+    "\n",
+    "model = moe_peft.LLMModel.from_pretrained(\n",
+    "    base_model,\n",
+    "    device=moe_peft.executor.default_device_name(),\n",
+    "    load_dtype=torch.bfloat16,\n",
+    ")\n",
+    "tokenizer = moe_peft.Tokenizer(base_model)\n",
+    "\n",
+    "model.init_adapter(moe_peft.AdapterConfig(adapter_name=\"default\"))\n",
+    "\n",
+    "gen_config = moe_peft.GenerateConfig(adapter_name=\"default\")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Build a chatbot"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 11,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "from IPython.display import Markdown\n",
+    "import textwrap\n",
+    "\n",
+    "\n",
+    "def display_chat(prompt, text):\n",
+    "    formatted_prompt = (\n",
+    "        \"<font color='brown'>🙋‍♂️<blockquote>\"\n",
+    "        + prompt\n",
+    "        + \"</blockquote></font>\"\n",
+    "    )\n",
+    "    text = text.replace(\"•\", \"  *\")\n",
+    "    text = textwrap.indent(text, \"> \", predicate=lambda _: True)\n",
+    "    formatted_text = \"<font color='teal'>🤖\\n\\n\" + text + \"\\n</font>\"\n",
+    "    return Markdown(formatted_prompt + formatted_text)\n",
+    "\n",
+    "\n",
+    "def to_markdown(text):\n",
+    "    text = text.replace(\"•\", \"  *\")\n",
+    "    return Markdown(textwrap.indent(text, \"> \", predicate=lambda _: True))\n",
+    "\n",
+    "\n",
+    "class ChatState:\n",
+    "    \"\"\"\n",
+    "    Manages the conversation history for a turn-based chatbot\n",
+    "    Follows the turn-based conversation guidelines for the Gemma family of models\n",
+    "    documented at https://ai.google.dev/gemma/docs/formatting\n",
+    "    \"\"\"\n",
+    "\n",
+    "    __START_TURN_USER__ = \"<start_of_turn>user\\n\"\n",
+    "    __START_TURN_MODEL__ = \"<start_of_turn>model\\n\"\n",
+    "    __END_TURN__ = \"<end_of_turn>\"\n",
+    "\n",
+    "    def __init__(\n",
+    "        self,\n",
+    "        model: moe_peft.LLMModel,\n",
+    "        tokenizer: moe_peft.Tokenizer,\n",
+    "        gen_config: moe_peft.GenerateConfig,\n",
+    "        system: str = \"\",\n",
+    "    ):\n",
+    "        \"\"\"\n",
+    "        Initializes the chat state.\n",
+    "\n",
+    "        Args:\n",
+    "            model: The language model to use for generating responses.\n",
+    "            system: (Optional) System instructions or bot description.\n",
+    "        \"\"\"\n",
+    "        self.model = model\n",
+    "        self.tokenizer = tokenizer\n",
+    "        self.gen_config = gen_config\n",
+    "        self.system = system\n",
+    "        self.history = []\n",
+    "\n",
+    "    def add_to_history_as_user(self, message):\n",
+    "        \"\"\"\n",
+    "        Adds a user message to the history with start/end turn markers.\n",
+    "        \"\"\"\n",
+    "        self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\")\n",
+    "\n",
+    "    def add_to_history_as_model(self, message):\n",
+    "        \"\"\"\n",
+    "        Adds a model response to the history with start/end turn markers.\n",
+    "        \"\"\"\n",
+    "        self.history.append(self.__START_TURN_MODEL__ + message)\n",
+    "\n",
+    "    def get_history(self):\n",
+    "        \"\"\"\n",
+    "        Returns the entire chat history as a single string.\n",
+    "        \"\"\"\n",
+    "        return \"\".join([*self.history])\n",
+    "\n",
+    "    def get_full_prompt(self):\n",
+    "        \"\"\"\n",
+    "        Builds the prompt for the language model, including history and system description.\n",
+    "        \"\"\"\n",
+    "        prompt = self.get_history() + self.__START_TURN_MODEL__\n",
+    "        if len(self.system) > 0:\n",
+    "            prompt = self.system + \"\\n\" + prompt\n",
+    "        return prompt\n",
+    "\n",
+    "    def send_message(self, message):\n",
+    "        \"\"\"\n",
+    "        Handles sending a user message and getting a model response.\n",
+    "\n",
+    "        Args:\n",
+    "            message: The user's message.\n",
+    "\n",
+    "        Returns:\n",
+    "            The model's response.\n",
+    "        \"\"\"\n",
+    "        self.add_to_history_as_user(message)\n",
+    "        prompt = self.get_full_prompt()\n",
+    "        self.gen_config.prompts = [prompt]\n",
+    "        response = moe_peft.generate(\n",
+    "            self.model, self.tokenizer, [self.gen_config], max_gen_len=2048\n",
+    "        )[self.gen_config.adapter_name][0]\n",
+    "        result = response.replace(prompt, \"\").replace(self.__END_TURN__, \"\")  # Extract only the new response\n",
+    "        self.add_to_history_as_model(result)\n",
+    "        return result"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Chat with the model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 12,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "<font color='brown'>🙋‍♂️<blockquote>Tell me, in a few words,  how to compute all prime numbers up to 1000?</blockquote></font><font color='teal'>🤖\n",
+       "\n",
+       "> Sieve of Eratosthenes.\n",
+       "> \n",
+       "> **Explanation:** This is an efficient algorithm for finding prime numbers.  \n",
+       "\n",
+       "</font>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "execution_count": 12,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "chat = ChatState(model, tokenizer, gen_config)\n",
+    "message = \"Tell me, in a few words,  how to compute all prime numbers up to 1000?\"\n",
+    "display_chat(message, chat.send_message(message))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 13,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "<font color='brown'>🙋‍♂️<blockquote>Now in Python! No numpy, please!</blockquote></font><font color='teal'>🤖\n",
+       "\n",
+       "> ```python\n",
+       "> def sieve_of_eratosthenes(n):\n",
+       ">   \"\"\"Returns a list of prime numbers up to n.\"\"\"\n",
+       ">   primes = [True] * (n + 1)\n",
+       ">   primes[0] = primes[1] = False\n",
+       "> \n",
+       ">   for i in range(2, int(n ** 0.5) + 1):\n",
+       ">     if primes[i]:\n",
+       ">       for j in range(i*i, n + 1, i):\n",
+       ">         primes[j] = False\n",
+       "> \n",
+       ">   return [i for i, is_prime in enumerate(primes) if is_prime]\n",
+       "> \n",
+       "> print(sieve_of_eratosthenes(1000))\n",
+       "> ```\n",
+       "> \n",
+       "> \n",
+       "> **Explanation:**\n",
+       "> \n",
+       "> 1. **Initialization:** A boolean list `primes` is created with size `n+1`, representing potential primes from 0 to n. Initially, both 0 and 1 are marked as non-primes.\n",
+       "> 2. **Iteration:** The loop starts from 2 up to the square root of `n`. We only need to check divisors up to the square root because any composite number has a prime factor less than or equal to its square root. \n",
+       "> 3. **Marking Non-Primes:** For each prime `i`, its multiples starting from `i*i` are marked as non-primes in the `primes` array.\n",
+       "> 4. **Returning Primes:** Finally, we create a new list by filtering the `primes` array using list comprehension, keeping only those indices where `primes[i]` is True, indicating that the corresponding index corresponds to a prime number. \n",
+       "> \n",
+       "> \n",
+       "> \n",
+       "> Let me know if you have any further questions or need additional explanations.\n",
+       "</font>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "execution_count": 13,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "message = \"Now in Python! No numpy, please!\"\n",
+    "display_chat(message, chat.send_message(message))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 14,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "<font color='brown'>🙋‍♂️<blockquote>Thank you, it works! Can you explain the code in French?</blockquote></font><font color='teal'>🤖\n",
+       "\n",
+       "> La fonction `sieve_of_eratosthenes(n)` renvoie une liste de nombres premiers jusqu'à `n`.\n",
+       "> \n",
+       "> ### Explication:\n",
+       ">  La fonction utilise la méthode de Sieve d’Erathostène pour trouver les nombres premiers. Voici comment ça marche: \n",
+       "> \n",
+       "> 1. **Initialisation**: \n",
+       ">    -  On crée une liste booléenne `primes` de taille `n+1` représentant des nombres potentiels qui sont premiers. Les éléments initiaux sont tous définis comme `True`.  Nous assignons les valeurs `False` à 0 et 1 car ils ne sont pas premiers.\n",
+       "> \n",
+       "> 2. **Itération**: \n",
+       ">     - On commence par le nombre 2 jusqu'au carré root de `n` (inclusif). Cela signifie que nous allons vérifier seulement les diviseurs potentiels jusqu'à la racine carrée de `n`. \n",
+       "> 3. **Marquer les non-premiers**: \n",
+       ">     - Pour chaque nombre premier `i`, on vérifie ses multiples (commençant par `i*i`) en plaçant les valeurs de `primes[j]` à `False`.\n",
+       "> \n",
+       "> \n",
+       "> 4. **Retourner les nombres premiers**: \n",
+       ">    -  Enfin, un nouvel array est construit en filtrant la liste `primes`  en utilisant la syntaxe de liste comprehension. On sélectionne uniquement les indices correspondant à des nombres premiers où `primes[i]` est `True`. \n",
+       "> \n",
+       "> \n",
+       "> \n",
+       "> \n",
+       "> \n",
+       ">  \n",
+       "> \n",
+       "> J'espère que cela vous aide à comprendre la fonction plus clairement! Si vous avez des questions supplémentaires, n'hésitez pas à les poser. \n",
+       "\n",
+       "</font>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "execution_count": 14,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "message = \"Thank you, it works! Can you explain the code in French?\"\n",
+    "display_chat(message, chat.send_message(message))"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 15,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/markdown": [
+       "<font color='brown'>🙋‍♂️<blockquote>Great! Now add those explanations as comments in the code.</blockquote></font><font color='teal'>🤖\n",
+       "\n",
+       "> ```python\n",
+       "> def sieve_of_eratosthenes(n):\n",
+       ">   \"\"\"Returns a list of prime numbers up to n.\"\"\"\n",
+       ">   #Initialize a boolean list 'primes' with size n+1 to represent all possible prime numbers.\n",
+       ">   primes = [True] * (n + 1) \n",
+       ">   primes[0] = primes[1] = False # 0 and 1 are not primes.\n",
+       "> \n",
+       ">   # Iterate through the integers from 2 up to the square root of n.\n",
+       ">   for i in range(2, int(n ** 0.5) + 1): \n",
+       ">     if primes[i]:\n",
+       ">       # If the current integer 'i' is prime\n",
+       ">       # Mark all multiples of 'i' as non-prime by setting their corresponding value in 'primes' to False\n",
+       ">       for j in range(i * i, n + 1, i): \n",
+       ">           primes[j] = False \n",
+       "> \n",
+       ">   #Filter the 'primes' list to obtain a list of prime numbers\n",
+       ">   return [i for i, is_prime in enumerate(primes) if is_prime]\n",
+       "> \n",
+       "> print(sieve_of_eratosthenes(1000))\n",
+       "> \n",
+       "> ```\n",
+       "> \n",
+       "> \n",
+       "> \n",
+       "> \n",
+       "> Let me know if you would like further details or clarifications! 😄 \n",
+       "</font>"
+      ],
+      "text/plain": [
+       "<IPython.core.display.Markdown object>"
+      ]
+     },
+     "execution_count": 15,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "message = \"Great! Now add those explanations as comments in the code.\"\n",
+    "display_chat(message, chat.send_message(message))"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "moe_peft",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}

From af42022ca0c7f7c263ef6c52f54f8da95a65c640 Mon Sep 17 00:00:00 2001
From: Mike Lee <mikecovlee@163.com>
Date: Mon, 18 Nov 2024 16:39:13 +0800
Subject: [PATCH 4/5] format code

---
 misc/inference-demo.ipynb | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/misc/inference-demo.ipynb b/misc/inference-demo.ipynb
index 8e4e6ef..b48dfc0 100644
--- a/misc/inference-demo.ipynb
+++ b/misc/inference-demo.ipynb
@@ -109,9 +109,7 @@
     "\n",
     "def display_chat(prompt, text):\n",
     "    formatted_prompt = (\n",
-    "        \"<font color='brown'>🙋‍♂️<blockquote>\"\n",
-    "        + prompt\n",
-    "        + \"</blockquote></font>\"\n",
+    "        \"<font color='brown'>🙋‍♂️<blockquote>\" + prompt + \"</blockquote></font>\"\n",
     "    )\n",
     "    text = text.replace(\"•\", \"  *\")\n",
     "    text = textwrap.indent(text, \"> \", predicate=lambda _: True)\n",
@@ -159,7 +157,9 @@
     "        \"\"\"\n",
     "        Adds a user message to the history with start/end turn markers.\n",
     "        \"\"\"\n",
-    "        self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\")\n",
+    "        self.history.append(\n",
+    "            self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\"\n",
+    "        )\n",
     "\n",
     "    def add_to_history_as_model(self, message):\n",
     "        \"\"\"\n",
@@ -198,7 +198,9 @@
     "        response = moe_peft.generate(\n",
     "            self.model, self.tokenizer, [self.gen_config], max_gen_len=2048\n",
     "        )[self.gen_config.adapter_name][0]\n",
-    "        result = response.replace(prompt, \"\").replace(self.__END_TURN__, \"\")  # Extract only the new response\n",
+    "        result = response.replace(prompt, \"\").replace(\n",
+    "            self.__END_TURN__, \"\"\n",
+    "        )  # Extract only the new response\n",
     "        self.add_to_history_as_model(result)\n",
     "        return result"
    ]

From f6c29dff26904dafa78ca7f349e125c7e9c9b35a Mon Sep 17 00:00:00 2001
From: Mike Lee <mikecovlee@163.com>
Date: Thu, 21 Nov 2024 14:58:27 +0800
Subject: [PATCH 5/5] add mixlora notebook

---
 misc/finetune-demo.ipynb      |  16 +++-
 misc/mixlora-demo.ipynb       | 166 ++++++++++++++++++++++++++++++++++
 moe_peft/__init__.py          |   2 +
 moe_peft/adapters/__init__.py |   8 ++
 tests/dummy_train.py          |  28 +++---
 tests/dummy_train_mixlora.py  |  89 ++++++++++++++++++
 6 files changed, 293 insertions(+), 16 deletions(-)
 create mode 100644 misc/mixlora-demo.ipynb
 create mode 100644 tests/dummy_train_mixlora.py

diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb
index df376d0..80b4e96 100644
--- a/misc/finetune-demo.ipynb
+++ b/misc/finetune-demo.ipynb
@@ -86,12 +86,18 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "lora_config = moe_peft.LoraConfig(\n",
+    "lora_config = moe_peft.adapter_factory(\n",
+    "    peft_type=\"LORA\",\n",
     "    adapter_name=\"lora_0\",\n",
-    "    lora_r_=32,\n",
-    "    lora_alpha_=64,\n",
-    "    lora_dropout_=0.05,\n",
-    "    target_modules_={\"q_proj\": True, \"k_proj\": True, \"v_proj\": True, \"o_proj\": True},\n",
+    "    r=8,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05,\n",
+    "    target_modules=[\n",
+    "        \"q_proj\",\n",
+    "        \"k_proj\",\n",
+    "        \"v_proj\",\n",
+    "        \"o_proj\",\n",
+    "    ],\n",
     ")\n",
     "\n",
     "model.init_adapter(lora_config)\n",
diff --git a/misc/mixlora-demo.ipynb b/misc/mixlora-demo.ipynb
new file mode 100644
index 0000000..b828021
--- /dev/null
+++ b/misc/mixlora-demo.ipynb
@@ -0,0 +1,166 @@
+{
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n",
+    "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n",
+    "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n",
+    "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n",
+    "[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n",
+    "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n",
+    "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n",
+    "\n",
+    "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n",
+    "\n",
+    "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n",
+    "\n",
+    "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n",
+    "\n",
+    "- Support for multiple PEFT algorithms and various pre-trained models.\n",
+    "\n",
+    "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n",
+    "\n",
+    "## About this notebook\n",
+    "\n",
+    "This is a simple jupiter notebook for showcasing the basic process of building MixLoRA MoE model from TinyLLaMA by fine-tuning with dummy data."
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Clone and install MoE-PEFT"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "! pip uninstall torchvision torchaudio -y\n",
+    "! pip install moe_peft"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Loading the base model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import torch\n",
+    "\n",
+    "import moe_peft\n",
+    "\n",
+    "moe_peft.setup_logging(\"INFO\")\n",
+    "\n",
+    "base_model = \"TinyLlama/TinyLlama_v1.1\"\n",
+    "\n",
+    "model = moe_peft.LLMModel.from_pretrained(\n",
+    "    base_model,\n",
+    "    device=moe_peft.executor.default_device_name(),\n",
+    "    load_dtype=torch.bfloat16,\n",
+    ")\n",
+    "tokenizer = moe_peft.Tokenizer(base_model)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Training a dummy LoRA adapter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "lora_config = moe_peft.adapter_factory(\n",
+    "    peft_type=\"MIXLORA\",\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    r=8,\n",
+    "    lora_alpha=16,\n",
+    "    lora_dropout=0.05,\n",
+    "    target_modules=[\n",
+    "        \"up_proj\",\n",
+    "        \"down_proj\",\n",
+    "        \"gate_proj\",\n",
+    "    ],\n",
+    "    routing_strategy=\"mixlora\",\n",
+    "    num_experts=6,\n",
+    ")\n",
+    "\n",
+    "model.init_adapter(lora_config)\n",
+    "\n",
+    "train_config = moe_peft.TrainConfig(\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    data_path=\"TUDB-Labs/Dummy-MoE-PEFT\",\n",
+    "    num_epochs=10,\n",
+    "    batch_size=16,\n",
+    "    micro_batch_size=8,\n",
+    "    learning_rate=1e-4,\n",
+    ")\n",
+    "\n",
+    "moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Validate the effectiveness of LoRA adapter"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "generate_config = moe_peft.GenerateConfig(\n",
+    "    adapter_name=\"mixlora_0\",\n",
+    "    prompts=[\"Could you provide an introduction to MoE-PEFT?\"],\n",
+    "    stop_token=\"\\n\",\n",
+    ")\n",
+    "\n",
+    "output = moe_peft.generate(\n",
+    "    model=model, tokenizer=tokenizer, configs=[generate_config], max_gen_len=128\n",
+    ")\n",
+    "\n",
+    "print(output[\"mixlora_0\"][0])"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "moe_peft",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.12.7"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}
diff --git a/moe_peft/__init__.py b/moe_peft/__init__.py
index a659493..ab8e116 100644
--- a/moe_peft/__init__.py
+++ b/moe_peft/__init__.py
@@ -1,3 +1,4 @@
+from .adapters import adapter_factory
 from .common import (
     AdapterConfig,
     LLMBatchConfig,
@@ -36,6 +37,7 @@
     "LLMModelInput",
     "AdapterConfig",
     "LoraConfig",
+    "adapter_factory",
     "TrainTask",
     "Dispatcher",
     "EvaluateConfig",
diff --git a/moe_peft/adapters/__init__.py b/moe_peft/adapters/__init__.py
index 7c21615..8fa73cf 100644
--- a/moe_peft/adapters/__init__.py
+++ b/moe_peft/adapters/__init__.py
@@ -62,6 +62,13 @@ def lora_config_factory(config: Dict[str, any]) -> LoraConfig:
     return config_class.from_config(config).check()
 
 
+def adapter_factory(peft_type: str, adapter_name: str, **kwargs) -> LoraConfig:
+    kwargs["peft_type"] = peft_type
+    config = lora_config_factory(kwargs)
+    config.adapter_name = adapter_name
+    return config
+
+
 def router_loss_factory(config: MixLoraConfig) -> torch.nn.Module:
     if config.routing_strategy_ not in router_loss_dict:
         return None
@@ -101,4 +108,5 @@ def moe_layer_factory(
     "lora_config_factory",
     "router_loss_factory",
     "moe_layer_factory",
+    "adapter_factory",
 ]
diff --git a/tests/dummy_train.py b/tests/dummy_train.py
index 0b973b2..9900064 100644
--- a/tests/dummy_train.py
+++ b/tests/dummy_train.py
@@ -5,10 +5,11 @@
 
 
 def main(
-    base_model: str,
+    base_model: str = "TinyLlama/TinyLlama_v1.1",
     adapter_name: str = "lora_0",
     train_data: str = "TUDB-Labs/Dummy-MoE-PEFT",
     test_prompt: str = "Could you provide an introduction to MoE-PEFT?",
+    save_path: str = None,
 ):
     moe_peft.setup_logging("INFO")
 
@@ -19,17 +20,18 @@ def main(
     )
     tokenizer = moe_peft.Tokenizer(base_model)
 
-    lora_config = moe_peft.LoraConfig(
+    lora_config = moe_peft.adapter_factory(
+        peft_type="LORA",
         adapter_name=adapter_name,
-        lora_r_=32,
-        lora_alpha_=64,
-        lora_dropout_=0.05,
-        target_modules_={
-            "q_proj": True,
-            "k_proj": True,
-            "v_proj": True,
-            "o_proj": True,
-        },
+        r=8,
+        lora_alpha=16,
+        lora_dropout=0.05,
+        target_modules=[
+            "q_proj",
+            "k_proj",
+            "v_proj",
+            "o_proj",
+        ],
     )
 
     train_config = moe_peft.TrainConfig(
@@ -44,6 +46,10 @@ def main(
     with moe_peft.executors.no_cache():
         model.init_adapter(lora_config)
         moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])
+        if save_path:
+            moe_peft.trainer.save_adapter_weight(
+                model=model, config=train_config, path=save_path
+            )
         lora_config, lora_weight = model.unload_adapter(adapter_name)
 
     generate_configs = [
diff --git a/tests/dummy_train_mixlora.py b/tests/dummy_train_mixlora.py
new file mode 100644
index 0000000..57b80f0
--- /dev/null
+++ b/tests/dummy_train_mixlora.py
@@ -0,0 +1,89 @@
+import fire
+import torch
+
+import moe_peft
+import moe_peft.adapters
+
+
+def main(
+    base_model: str = "TinyLlama/TinyLlama_v1.1",
+    adapter_name: str = "mixlora_0",
+    train_data: str = "TUDB-Labs/Dummy-MoE-PEFT",
+    test_prompt: str = "Could you provide an introduction to MoE-PEFT?",
+    save_path: str = None,
+):
+    moe_peft.setup_logging("INFO")
+
+    model: moe_peft.LLMModel = moe_peft.LLMModel.from_pretrained(
+        base_model,
+        device=moe_peft.executor.default_device_name(),
+        load_dtype=torch.bfloat16,
+    )
+    tokenizer = moe_peft.Tokenizer(base_model)
+
+    lora_config = moe_peft.adapter_factory(
+        peft_type="MIXLORA",
+        adapter_name=adapter_name,
+        r=8,
+        lora_alpha=16,
+        lora_dropout=0.05,
+        target_modules=[
+            "up_proj",
+            "down_proj",
+            "gate_proj",
+        ],
+        routing_strategy="mixlora",
+        num_experts=6,
+    )
+
+    train_config = moe_peft.TrainConfig(
+        adapter_name=adapter_name,
+        data_path=train_data,
+        num_epochs=10,
+        batch_size=16,
+        micro_batch_size=8,
+        learning_rate=1e-4,
+    )
+
+    with moe_peft.executors.no_cache():
+        model.init_adapter(lora_config)
+        moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])
+        if save_path:
+            moe_peft.trainer.save_adapter_weight(
+                model=model, config=train_config, path=save_path
+            )
+        lora_config, lora_weight = model.unload_adapter(adapter_name)
+
+    generate_configs = [
+        moe_peft.GenerateConfig(
+            adapter_name=adapter_name,
+            prompts=[test_prompt],
+            stop_token="\n",
+        ),
+        moe_peft.GenerateConfig(
+            adapter_name="default",
+            prompts=[test_prompt],
+            stop_token="\n",
+        ),
+    ]
+
+    with moe_peft.executors.no_cache():
+        model.init_adapter(lora_config, lora_weight)
+        model.init_adapter(moe_peft.AdapterConfig(adapter_name="default"))
+        outputs = moe_peft.generate(
+            model=model,
+            tokenizer=tokenizer,
+            configs=generate_configs,
+            max_gen_len=128,
+        )
+
+    print(f"\n{'=' * 10}\n")
+    print(f"PROMPT: {test_prompt}\n")
+    for adapter_name, output in outputs.items():
+        print(f"{adapter_name} OUTPUT:")
+        print(f"{output[0]}\n")
+    print(f"\n{'=' * 10}\n")
+
+
+if __name__ == "__main__":
+    fire.Fire(main)