From 2371bb16bf47b11a905a8548a2389ab0838c84c8 Mon Sep 17 00:00:00 2001 From: Mike Lee Date: Fri, 15 Nov 2024 10:01:15 +0800 Subject: [PATCH 1/5] [deps] update requirements --- .github/workflows/python-publish.yml | 2 +- .github/workflows/python-test.yml | 4 ++-- Dockerfile | 2 +- Install.md | 6 +++--- misc/finetune-demo.ipynb | 2 +- pyproject.toml | 6 +++--- requirements.txt | 6 +++--- 7 files changed, 14 insertions(+), 14 deletions(-) diff --git a/.github/workflows/python-publish.yml b/.github/workflows/python-publish.yml index 8d22aeb..2c21fae 100644 --- a/.github/workflows/python-publish.yml +++ b/.github/workflows/python-publish.yml @@ -25,7 +25,7 @@ jobs: - name: Set up Python uses: actions/setup-python@v3 with: - python-version: '3.11' + python-version: '3.12' - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/.github/workflows/python-test.yml b/.github/workflows/python-test.yml index 4e4ef83..bd04b22 100644 --- a/.github/workflows/python-test.yml +++ b/.github/workflows/python-test.yml @@ -15,10 +15,10 @@ jobs: steps: - uses: actions/checkout@v3 - - name: Set up Python 3.11 + - name: Set up Python 3.12 uses: actions/setup-python@v3 with: - python-version: "3.11" + python-version: "3.12" - name: Install dependencies run: | python -m pip install --upgrade pip diff --git a/Dockerfile b/Dockerfile index b5d47ab..5246f1b 100644 --- a/Dockerfile +++ b/Dockerfile @@ -1,6 +1,6 @@ FROM nvidia/cuda:12.5.1-devel-ubuntu22.04 -ARG PYTHON_VERSION=3.11 +ARG PYTHON_VERSION=3.12 ARG http_proxy ARG https_proxy diff --git a/Install.md b/Install.md index 2e17f11..e88b5c9 100644 --- a/Install.md +++ b/Install.md @@ -65,7 +65,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully. git clone https://github.com/TUDB-Labs/MoE-PEFT cd moe_peft # Optional but recommended -conda create -n moe_peft python=3.11 +conda create -n moe_peft python=3.12 conda activate moe_peft # Install requirements pip3 install -r requirements.txt --upgrade @@ -116,7 +116,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully. git clone https://github.com/TUDB-Labs/MoE-PEFT cd moe_peft # Optional but recommended -conda create -n moe_peft python=3.11 +conda create -n moe_peft python=3.12 conda activate moe_peft # Install requirements (CUDA 12.1) pip3 install torch==2.3.1 --index-url https://download.pytorch.org/whl/cu121 @@ -164,7 +164,7 @@ MoE-PEFT: NVIDIA CUDA initialized successfully. git clone https://github.com/TUDB-Labs/MoE-PEFT cd moe_peft # Optional but recommended -conda create -n moe_peft python=3.11 +conda create -n moe_peft python=3.12 conda activate moe_peft # Install requirements pip3 install -r requirements.txt --upgrade diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb index 66ee3ba..c6f21eb 100644 --- a/misc/finetune-demo.ipynb +++ b/misc/finetune-demo.ipynb @@ -149,7 +149,7 @@ "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", - "version": "3.11.9" + "version": "3.12.7" } }, "nbformat": 4, diff --git a/pyproject.toml b/pyproject.toml index 5b929f2..7c21987 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -4,7 +4,7 @@ build-backend = "setuptools.build_meta" [project] name = "moe_peft" -version = "2.0.0" +version = "2.0.1" description = "An Efficient LLM Fine-Tuning Factory Optimized for MoE PEFT" readme = "README.md" requires-python = ">=3.8" @@ -14,11 +14,11 @@ classifiers = [ "Operating System :: OS Independent", ] dependencies = [ - "torch>=2.3.0,<2.5.0", + "torch>=2.4.0,<2.6.0", "datasets", "evaluate", "accelerate", - "transformers>=4.44.0,<4.46.0", + "transformers>=4.44.0,<4.47.0", "sentencepiece", "huggingface_hub", "scikit-learn", diff --git a/requirements.txt b/requirements.txt index 88c514b..97dfa58 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,14 +1,14 @@ -torch>=2.3.0,<2.5.0 +torch>=2.4.0,<2.6.0 datasets evaluate accelerate -transformers>=4.44.0,<4.46.0 +transformers>=4.44.0,<4.47.0 sentencepiece huggingface_hub scikit-learn tiktoken mixlora>=0.2.2,<0.3.0 -gradio==4.38.1 +gradio peft==0.11.1 pandas fire From 7a5e36211a83d98081421642533006fe2acfbdf1 Mon Sep 17 00:00:00 2001 From: Mike Lee Date: Mon, 18 Nov 2024 16:37:17 +0800 Subject: [PATCH 2/5] format codes --- generate.py | 4 ++-- moe_peft.py | 4 ++-- moe_peft/evaluator.py | 2 +- tests/dummy_train.py | 4 ++-- 4 files changed, 7 insertions(+), 7 deletions(-) diff --git a/generate.py b/generate.py index ea90992..34e954f 100644 --- a/generate.py +++ b/generate.py @@ -56,9 +56,9 @@ def main( ) for prompt in output[adapter_name]: - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") print(prompt) - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") if __name__ == "__main__": diff --git a/moe_peft.py b/moe_peft.py index 1f08794..a70f7eb 100644 --- a/moe_peft.py +++ b/moe_peft.py @@ -212,12 +212,12 @@ def inference( cache_implementation=args.cache_implementation, stream_callback=callback, ) - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") print(f"PROMPT: {input_raw}") for adapter_name, output in outputs.items(): print(f"{adapter_name} OUTPUT:") print(output[0]) - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") # Main Function diff --git a/moe_peft/evaluator.py b/moe_peft/evaluator.py index 369bfcc..07a0b0c 100644 --- a/moe_peft/evaluator.py +++ b/moe_peft/evaluator.py @@ -184,7 +184,7 @@ def _compute_metrcis(model, current_configs, sequence_lengths, batch_labels, out router_statistic_[idx] += val for idx, val in enumerate(router_statistic_): logging.info( - f"{config.adapter_name}: expert {idx}, load = {val/32}" + f"{config.adapter_name}: expert {idx}, load = {val / 32}" ) batch_size = logits.shape[0] diff --git a/tests/dummy_train.py b/tests/dummy_train.py index bb6b1b8..0b973b2 100644 --- a/tests/dummy_train.py +++ b/tests/dummy_train.py @@ -69,12 +69,12 @@ def main( max_gen_len=128, ) - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") print(f"PROMPT: {test_prompt}\n") for adapter_name, output in outputs.items(): print(f"{adapter_name} OUTPUT:") print(f"{output[0]}\n") - print(f"\n{'='*10}\n") + print(f"\n{'=' * 10}\n") if __name__ == "__main__": From 850db5e42260dea8b2938d1accc89ded4203907b Mon Sep 17 00:00:00 2001 From: Mike Lee Date: Mon, 18 Nov 2024 16:37:26 +0800 Subject: [PATCH 3/5] update notebooks --- misc/finetune-demo.ipynb | 10 +- misc/inference-demo.ipynb | 422 ++++++++++++++++++++++++++++++++++++++ 2 files changed, 428 insertions(+), 4 deletions(-) create mode 100644 misc/inference-demo.ipynb diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb index c6f21eb..df376d0 100644 --- a/misc/finetune-demo.ipynb +++ b/misc/finetune-demo.ipynb @@ -4,7 +4,7 @@ "cell_type": "markdown", "metadata": {}, "source": [ - "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory Optimized for MoE PEFT\n", + "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n", "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n", "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n", "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n", @@ -12,13 +12,15 @@ "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n", "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n", "\n", - "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA) developed by the [IDs Lab](https://ids-lab-asia.github.io) at Sichuan University. It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as LoRA, DoRA, MixLoRA, and others. Key features of MoE-PEFT include:\n", + "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n", "\n", - "- Concurrent fine-tuning of multiple adapters with a shared pre-trained model.\n", + "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n", + "\n", + "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n", "\n", "- Support for multiple PEFT algorithms and various pre-trained models.\n", "\n", - "- MoE PEFT optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA).\n", + "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n", "\n", "## About this notebook\n", "\n", diff --git a/misc/inference-demo.ipynb b/misc/inference-demo.ipynb new file mode 100644 index 0000000..8e4e6ef --- /dev/null +++ b/misc/inference-demo.ipynb @@ -0,0 +1,422 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n", + "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n", + "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n", + "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n", + "[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n", + "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n", + "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n", + "\n", + "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n", + "\n", + "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n", + "\n", + "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n", + "\n", + "- Support for multiple PEFT algorithms and various pre-trained models.\n", + "\n", + "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n", + "\n", + "## About this notebook\n", + "\n", + "This is a simple jupiter notebook for showcasing the basic process of building chatbot with TinyLLaMA." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clone and install MoE-PEFT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip uninstall torchvision torchaudio -y\n", + "! pip install moe_peft" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [ + { + "data": { + "application/vnd.jupyter.widget-view+json": { + "model_id": "365a13c3d8654e51ad894b8459a5297c", + "version_major": 2, + "version_minor": 0 + }, + "text/plain": [ + "Loading checkpoint shards: 0%| | 0/2 [00:00🙋‍♂️
\"\n", + " + prompt\n", + " + \"
\"\n", + " )\n", + " text = text.replace(\"•\", \" *\")\n", + " text = textwrap.indent(text, \"> \", predicate=lambda _: True)\n", + " formatted_text = \"🤖\\n\\n\" + text + \"\\n\"\n", + " return Markdown(formatted_prompt + formatted_text)\n", + "\n", + "\n", + "def to_markdown(text):\n", + " text = text.replace(\"•\", \" *\")\n", + " return Markdown(textwrap.indent(text, \"> \", predicate=lambda _: True))\n", + "\n", + "\n", + "class ChatState:\n", + " \"\"\"\n", + " Manages the conversation history for a turn-based chatbot\n", + " Follows the turn-based conversation guidelines for the Gemma family of models\n", + " documented at https://ai.google.dev/gemma/docs/formatting\n", + " \"\"\"\n", + "\n", + " __START_TURN_USER__ = \"user\\n\"\n", + " __START_TURN_MODEL__ = \"model\\n\"\n", + " __END_TURN__ = \"\"\n", + "\n", + " def __init__(\n", + " self,\n", + " model: moe_peft.LLMModel,\n", + " tokenizer: moe_peft.Tokenizer,\n", + " gen_config: moe_peft.GenerateConfig,\n", + " system: str = \"\",\n", + " ):\n", + " \"\"\"\n", + " Initializes the chat state.\n", + "\n", + " Args:\n", + " model: The language model to use for generating responses.\n", + " system: (Optional) System instructions or bot description.\n", + " \"\"\"\n", + " self.model = model\n", + " self.tokenizer = tokenizer\n", + " self.gen_config = gen_config\n", + " self.system = system\n", + " self.history = []\n", + "\n", + " def add_to_history_as_user(self, message):\n", + " \"\"\"\n", + " Adds a user message to the history with start/end turn markers.\n", + " \"\"\"\n", + " self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\")\n", + "\n", + " def add_to_history_as_model(self, message):\n", + " \"\"\"\n", + " Adds a model response to the history with start/end turn markers.\n", + " \"\"\"\n", + " self.history.append(self.__START_TURN_MODEL__ + message)\n", + "\n", + " def get_history(self):\n", + " \"\"\"\n", + " Returns the entire chat history as a single string.\n", + " \"\"\"\n", + " return \"\".join([*self.history])\n", + "\n", + " def get_full_prompt(self):\n", + " \"\"\"\n", + " Builds the prompt for the language model, including history and system description.\n", + " \"\"\"\n", + " prompt = self.get_history() + self.__START_TURN_MODEL__\n", + " if len(self.system) > 0:\n", + " prompt = self.system + \"\\n\" + prompt\n", + " return prompt\n", + "\n", + " def send_message(self, message):\n", + " \"\"\"\n", + " Handles sending a user message and getting a model response.\n", + "\n", + " Args:\n", + " message: The user's message.\n", + "\n", + " Returns:\n", + " The model's response.\n", + " \"\"\"\n", + " self.add_to_history_as_user(message)\n", + " prompt = self.get_full_prompt()\n", + " self.gen_config.prompts = [prompt]\n", + " response = moe_peft.generate(\n", + " self.model, self.tokenizer, [self.gen_config], max_gen_len=2048\n", + " )[self.gen_config.adapter_name][0]\n", + " result = response.replace(prompt, \"\").replace(self.__END_TURN__, \"\") # Extract only the new response\n", + " self.add_to_history_as_model(result)\n", + " return result" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Chat with the model" + ] + }, + { + "cell_type": "code", + "execution_count": 12, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "🙋‍♂️
Tell me, in a few words, how to compute all prime numbers up to 1000?
🤖\n", + "\n", + "> Sieve of Eratosthenes.\n", + "> \n", + "> **Explanation:** This is an efficient algorithm for finding prime numbers. \n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 12, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "chat = ChatState(model, tokenizer, gen_config)\n", + "message = \"Tell me, in a few words, how to compute all prime numbers up to 1000?\"\n", + "display_chat(message, chat.send_message(message))" + ] + }, + { + "cell_type": "code", + "execution_count": 13, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "🙋‍♂️
Now in Python! No numpy, please!
🤖\n", + "\n", + "> ```python\n", + "> def sieve_of_eratosthenes(n):\n", + "> \"\"\"Returns a list of prime numbers up to n.\"\"\"\n", + "> primes = [True] * (n + 1)\n", + "> primes[0] = primes[1] = False\n", + "> \n", + "> for i in range(2, int(n ** 0.5) + 1):\n", + "> if primes[i]:\n", + "> for j in range(i*i, n + 1, i):\n", + "> primes[j] = False\n", + "> \n", + "> return [i for i, is_prime in enumerate(primes) if is_prime]\n", + "> \n", + "> print(sieve_of_eratosthenes(1000))\n", + "> ```\n", + "> \n", + "> \n", + "> **Explanation:**\n", + "> \n", + "> 1. **Initialization:** A boolean list `primes` is created with size `n+1`, representing potential primes from 0 to n. Initially, both 0 and 1 are marked as non-primes.\n", + "> 2. **Iteration:** The loop starts from 2 up to the square root of `n`. We only need to check divisors up to the square root because any composite number has a prime factor less than or equal to its square root. \n", + "> 3. **Marking Non-Primes:** For each prime `i`, its multiples starting from `i*i` are marked as non-primes in the `primes` array.\n", + "> 4. **Returning Primes:** Finally, we create a new list by filtering the `primes` array using list comprehension, keeping only those indices where `primes[i]` is True, indicating that the corresponding index corresponds to a prime number. \n", + "> \n", + "> \n", + "> \n", + "> Let me know if you have any further questions or need additional explanations.\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 13, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "message = \"Now in Python! No numpy, please!\"\n", + "display_chat(message, chat.send_message(message))" + ] + }, + { + "cell_type": "code", + "execution_count": 14, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "🙋‍♂️
Thank you, it works! Can you explain the code in French?
🤖\n", + "\n", + "> La fonction `sieve_of_eratosthenes(n)` renvoie une liste de nombres premiers jusqu'à `n`.\n", + "> \n", + "> ### Explication:\n", + "> La fonction utilise la méthode de Sieve d’Erathostène pour trouver les nombres premiers. Voici comment ça marche: \n", + "> \n", + "> 1. **Initialisation**: \n", + "> - On crée une liste booléenne `primes` de taille `n+1` représentant des nombres potentiels qui sont premiers. Les éléments initiaux sont tous définis comme `True`. Nous assignons les valeurs `False` à 0 et 1 car ils ne sont pas premiers.\n", + "> \n", + "> 2. **Itération**: \n", + "> - On commence par le nombre 2 jusqu'au carré root de `n` (inclusif). Cela signifie que nous allons vérifier seulement les diviseurs potentiels jusqu'à la racine carrée de `n`. \n", + "> 3. **Marquer les non-premiers**: \n", + "> - Pour chaque nombre premier `i`, on vérifie ses multiples (commençant par `i*i`) en plaçant les valeurs de `primes[j]` à `False`.\n", + "> \n", + "> \n", + "> 4. **Retourner les nombres premiers**: \n", + "> - Enfin, un nouvel array est construit en filtrant la liste `primes` en utilisant la syntaxe de liste comprehension. On sélectionne uniquement les indices correspondant à des nombres premiers où `primes[i]` est `True`. \n", + "> \n", + "> \n", + "> \n", + "> \n", + "> \n", + "> \n", + "> \n", + "> J'espère que cela vous aide à comprendre la fonction plus clairement! Si vous avez des questions supplémentaires, n'hésitez pas à les poser. \n", + "\n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 14, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "message = \"Thank you, it works! Can you explain the code in French?\"\n", + "display_chat(message, chat.send_message(message))" + ] + }, + { + "cell_type": "code", + "execution_count": 15, + "metadata": {}, + "outputs": [ + { + "data": { + "text/markdown": [ + "🙋‍♂️
Great! Now add those explanations as comments in the code.
🤖\n", + "\n", + "> ```python\n", + "> def sieve_of_eratosthenes(n):\n", + "> \"\"\"Returns a list of prime numbers up to n.\"\"\"\n", + "> #Initialize a boolean list 'primes' with size n+1 to represent all possible prime numbers.\n", + "> primes = [True] * (n + 1) \n", + "> primes[0] = primes[1] = False # 0 and 1 are not primes.\n", + "> \n", + "> # Iterate through the integers from 2 up to the square root of n.\n", + "> for i in range(2, int(n ** 0.5) + 1): \n", + "> if primes[i]:\n", + "> # If the current integer 'i' is prime\n", + "> # Mark all multiples of 'i' as non-prime by setting their corresponding value in 'primes' to False\n", + "> for j in range(i * i, n + 1, i): \n", + "> primes[j] = False \n", + "> \n", + "> #Filter the 'primes' list to obtain a list of prime numbers\n", + "> return [i for i, is_prime in enumerate(primes) if is_prime]\n", + "> \n", + "> print(sieve_of_eratosthenes(1000))\n", + "> \n", + "> ```\n", + "> \n", + "> \n", + "> \n", + "> \n", + "> Let me know if you would like further details or clarifications! 😄 \n", + "" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 15, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "message = \"Great! Now add those explanations as comments in the code.\"\n", + "display_chat(message, chat.send_message(message))" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "moe_peft", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} From af42022ca0c7f7c263ef6c52f54f8da95a65c640 Mon Sep 17 00:00:00 2001 From: Mike Lee Date: Mon, 18 Nov 2024 16:39:13 +0800 Subject: [PATCH 4/5] format code --- misc/inference-demo.ipynb | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/misc/inference-demo.ipynb b/misc/inference-demo.ipynb index 8e4e6ef..b48dfc0 100644 --- a/misc/inference-demo.ipynb +++ b/misc/inference-demo.ipynb @@ -109,9 +109,7 @@ "\n", "def display_chat(prompt, text):\n", " formatted_prompt = (\n", - " \"🙋‍♂️
\"\n", - " + prompt\n", - " + \"
\"\n", + " \"🙋‍♂️
\" + prompt + \"
\"\n", " )\n", " text = text.replace(\"•\", \" *\")\n", " text = textwrap.indent(text, \"> \", predicate=lambda _: True)\n", @@ -159,7 +157,9 @@ " \"\"\"\n", " Adds a user message to the history with start/end turn markers.\n", " \"\"\"\n", - " self.history.append(self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\")\n", + " self.history.append(\n", + " self.__START_TURN_USER__ + message + self.__END_TURN__ + \"\\n\"\n", + " )\n", "\n", " def add_to_history_as_model(self, message):\n", " \"\"\"\n", @@ -198,7 +198,9 @@ " response = moe_peft.generate(\n", " self.model, self.tokenizer, [self.gen_config], max_gen_len=2048\n", " )[self.gen_config.adapter_name][0]\n", - " result = response.replace(prompt, \"\").replace(self.__END_TURN__, \"\") # Extract only the new response\n", + " result = response.replace(prompt, \"\").replace(\n", + " self.__END_TURN__, \"\"\n", + " ) # Extract only the new response\n", " self.add_to_history_as_model(result)\n", " return result" ] From f6c29dff26904dafa78ca7f349e125c7e9c9b35a Mon Sep 17 00:00:00 2001 From: Mike Lee Date: Thu, 21 Nov 2024 14:58:27 +0800 Subject: [PATCH 5/5] add mixlora notebook --- misc/finetune-demo.ipynb | 16 +++- misc/mixlora-demo.ipynb | 166 ++++++++++++++++++++++++++++++++++ moe_peft/__init__.py | 2 + moe_peft/adapters/__init__.py | 8 ++ tests/dummy_train.py | 28 +++--- tests/dummy_train_mixlora.py | 89 ++++++++++++++++++ 6 files changed, 293 insertions(+), 16 deletions(-) create mode 100644 misc/mixlora-demo.ipynb create mode 100644 tests/dummy_train_mixlora.py diff --git a/misc/finetune-demo.ipynb b/misc/finetune-demo.ipynb index df376d0..80b4e96 100644 --- a/misc/finetune-demo.ipynb +++ b/misc/finetune-demo.ipynb @@ -86,12 +86,18 @@ "metadata": {}, "outputs": [], "source": [ - "lora_config = moe_peft.LoraConfig(\n", + "lora_config = moe_peft.adapter_factory(\n", + " peft_type=\"LORA\",\n", " adapter_name=\"lora_0\",\n", - " lora_r_=32,\n", - " lora_alpha_=64,\n", - " lora_dropout_=0.05,\n", - " target_modules_={\"q_proj\": True, \"k_proj\": True, \"v_proj\": True, \"o_proj\": True},\n", + " r=8,\n", + " lora_alpha=16,\n", + " lora_dropout=0.05,\n", + " target_modules=[\n", + " \"q_proj\",\n", + " \"k_proj\",\n", + " \"v_proj\",\n", + " \"o_proj\",\n", + " ],\n", ")\n", "\n", "model.init_adapter(lora_config)\n", diff --git a/misc/mixlora-demo.ipynb b/misc/mixlora-demo.ipynb new file mode 100644 index 0000000..b828021 --- /dev/null +++ b/misc/mixlora-demo.ipynb @@ -0,0 +1,166 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# MoE-PEFT: An Efficient LLM Fine-Tuning Factory for Mixture of Expert (MoE) Parameter-Efficient Fine-Tuning.\n", + "[![](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml/badge.svg)](https://github.com/TUDB-Labs/MoE-PEFT/actions/workflows/python-test.yml)\n", + "[![](https://img.shields.io/github/stars/TUDB-Labs/MoE-PEFT?logo=GitHub&style=flat)](https://github.com/TUDB-Labs/MoE-PEFT/stargazers)\n", + "[![](https://img.shields.io/github/v/release/TUDB-Labs/MoE-PEFT?logo=Github)](https://github.com/TUDB-Labs/MoE-PEFT/releases/latest)\n", + "[![](https://img.shields.io/pypi/v/moe_peft?logo=pypi)](https://pypi.org/project/moe_peft/)\n", + "[![](https://img.shields.io/docker/v/mikecovlee/moe_peft?logo=Docker&label=docker)](https://hub.docker.com/r/mikecovlee/moe_peft/tags)\n", + "[![](https://img.shields.io/github/license/TUDB-Labs/MoE-PEFT)](http://www.apache.org/licenses/LICENSE-2.0)\n", + "\n", + "MoE-PEFT is an open-source *LLMOps* framework built on [m-LoRA](https://github.com/TUDB-Labs/mLoRA). It is designed for high-throughput fine-tuning, evaluation, and inference of Large Language Models (LLMs) using techniques such as MoE + Others (like LoRA, DoRA). Key features of MoE-PEFT include:\n", + "\n", + "- Concurrent fine-tuning, evaluation, and inference of multiple adapters with a shared pre-trained model.\n", + "\n", + "- **MoE PEFT** optimization, mainly for [MixLoRA](https://github.com/TUDB-Labs/MixLoRA) and other MoLE implementation.\n", + "\n", + "- Support for multiple PEFT algorithms and various pre-trained models.\n", + "\n", + "- Seamless integration with the [HuggingFace](https://huggingface.co) ecosystem.\n", + "\n", + "## About this notebook\n", + "\n", + "This is a simple jupiter notebook for showcasing the basic process of building MixLoRA MoE model from TinyLLaMA by fine-tuning with dummy data." + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Clone and install MoE-PEFT" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "! pip uninstall torchvision torchaudio -y\n", + "! pip install moe_peft" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Loading the base model" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "import torch\n", + "\n", + "import moe_peft\n", + "\n", + "moe_peft.setup_logging(\"INFO\")\n", + "\n", + "base_model = \"TinyLlama/TinyLlama_v1.1\"\n", + "\n", + "model = moe_peft.LLMModel.from_pretrained(\n", + " base_model,\n", + " device=moe_peft.executor.default_device_name(),\n", + " load_dtype=torch.bfloat16,\n", + ")\n", + "tokenizer = moe_peft.Tokenizer(base_model)" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Training a dummy LoRA adapter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "lora_config = moe_peft.adapter_factory(\n", + " peft_type=\"MIXLORA\",\n", + " adapter_name=\"mixlora_0\",\n", + " r=8,\n", + " lora_alpha=16,\n", + " lora_dropout=0.05,\n", + " target_modules=[\n", + " \"up_proj\",\n", + " \"down_proj\",\n", + " \"gate_proj\",\n", + " ],\n", + " routing_strategy=\"mixlora\",\n", + " num_experts=6,\n", + ")\n", + "\n", + "model.init_adapter(lora_config)\n", + "\n", + "train_config = moe_peft.TrainConfig(\n", + " adapter_name=\"mixlora_0\",\n", + " data_path=\"TUDB-Labs/Dummy-MoE-PEFT\",\n", + " num_epochs=10,\n", + " batch_size=16,\n", + " micro_batch_size=8,\n", + " learning_rate=1e-4,\n", + ")\n", + "\n", + "moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config])" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "## Validate the effectiveness of LoRA adapter" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [ + "generate_config = moe_peft.GenerateConfig(\n", + " adapter_name=\"mixlora_0\",\n", + " prompts=[\"Could you provide an introduction to MoE-PEFT?\"],\n", + " stop_token=\"\\n\",\n", + ")\n", + "\n", + "output = moe_peft.generate(\n", + " model=model, tokenizer=tokenizer, configs=[generate_config], max_gen_len=128\n", + ")\n", + "\n", + "print(output[\"mixlora_0\"][0])" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "moe_peft", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.7" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +} diff --git a/moe_peft/__init__.py b/moe_peft/__init__.py index a659493..ab8e116 100644 --- a/moe_peft/__init__.py +++ b/moe_peft/__init__.py @@ -1,3 +1,4 @@ +from .adapters import adapter_factory from .common import ( AdapterConfig, LLMBatchConfig, @@ -36,6 +37,7 @@ "LLMModelInput", "AdapterConfig", "LoraConfig", + "adapter_factory", "TrainTask", "Dispatcher", "EvaluateConfig", diff --git a/moe_peft/adapters/__init__.py b/moe_peft/adapters/__init__.py index 7c21615..8fa73cf 100644 --- a/moe_peft/adapters/__init__.py +++ b/moe_peft/adapters/__init__.py @@ -62,6 +62,13 @@ def lora_config_factory(config: Dict[str, any]) -> LoraConfig: return config_class.from_config(config).check() +def adapter_factory(peft_type: str, adapter_name: str, **kwargs) -> LoraConfig: + kwargs["peft_type"] = peft_type + config = lora_config_factory(kwargs) + config.adapter_name = adapter_name + return config + + def router_loss_factory(config: MixLoraConfig) -> torch.nn.Module: if config.routing_strategy_ not in router_loss_dict: return None @@ -101,4 +108,5 @@ def moe_layer_factory( "lora_config_factory", "router_loss_factory", "moe_layer_factory", + "adapter_factory", ] diff --git a/tests/dummy_train.py b/tests/dummy_train.py index 0b973b2..9900064 100644 --- a/tests/dummy_train.py +++ b/tests/dummy_train.py @@ -5,10 +5,11 @@ def main( - base_model: str, + base_model: str = "TinyLlama/TinyLlama_v1.1", adapter_name: str = "lora_0", train_data: str = "TUDB-Labs/Dummy-MoE-PEFT", test_prompt: str = "Could you provide an introduction to MoE-PEFT?", + save_path: str = None, ): moe_peft.setup_logging("INFO") @@ -19,17 +20,18 @@ def main( ) tokenizer = moe_peft.Tokenizer(base_model) - lora_config = moe_peft.LoraConfig( + lora_config = moe_peft.adapter_factory( + peft_type="LORA", adapter_name=adapter_name, - lora_r_=32, - lora_alpha_=64, - lora_dropout_=0.05, - target_modules_={ - "q_proj": True, - "k_proj": True, - "v_proj": True, - "o_proj": True, - }, + r=8, + lora_alpha=16, + lora_dropout=0.05, + target_modules=[ + "q_proj", + "k_proj", + "v_proj", + "o_proj", + ], ) train_config = moe_peft.TrainConfig( @@ -44,6 +46,10 @@ def main( with moe_peft.executors.no_cache(): model.init_adapter(lora_config) moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config]) + if save_path: + moe_peft.trainer.save_adapter_weight( + model=model, config=train_config, path=save_path + ) lora_config, lora_weight = model.unload_adapter(adapter_name) generate_configs = [ diff --git a/tests/dummy_train_mixlora.py b/tests/dummy_train_mixlora.py new file mode 100644 index 0000000..57b80f0 --- /dev/null +++ b/tests/dummy_train_mixlora.py @@ -0,0 +1,89 @@ +import fire +import torch + +import moe_peft +import moe_peft.adapters + + +def main( + base_model: str = "TinyLlama/TinyLlama_v1.1", + adapter_name: str = "mixlora_0", + train_data: str = "TUDB-Labs/Dummy-MoE-PEFT", + test_prompt: str = "Could you provide an introduction to MoE-PEFT?", + save_path: str = None, +): + moe_peft.setup_logging("INFO") + + model: moe_peft.LLMModel = moe_peft.LLMModel.from_pretrained( + base_model, + device=moe_peft.executor.default_device_name(), + load_dtype=torch.bfloat16, + ) + tokenizer = moe_peft.Tokenizer(base_model) + + lora_config = moe_peft.adapter_factory( + peft_type="MIXLORA", + adapter_name=adapter_name, + r=8, + lora_alpha=16, + lora_dropout=0.05, + target_modules=[ + "up_proj", + "down_proj", + "gate_proj", + ], + routing_strategy="mixlora", + num_experts=6, + ) + + train_config = moe_peft.TrainConfig( + adapter_name=adapter_name, + data_path=train_data, + num_epochs=10, + batch_size=16, + micro_batch_size=8, + learning_rate=1e-4, + ) + + with moe_peft.executors.no_cache(): + model.init_adapter(lora_config) + moe_peft.train(model=model, tokenizer=tokenizer, configs=[train_config]) + if save_path: + moe_peft.trainer.save_adapter_weight( + model=model, config=train_config, path=save_path + ) + lora_config, lora_weight = model.unload_adapter(adapter_name) + + generate_configs = [ + moe_peft.GenerateConfig( + adapter_name=adapter_name, + prompts=[test_prompt], + stop_token="\n", + ), + moe_peft.GenerateConfig( + adapter_name="default", + prompts=[test_prompt], + stop_token="\n", + ), + ] + + with moe_peft.executors.no_cache(): + model.init_adapter(lora_config, lora_weight) + model.init_adapter(moe_peft.AdapterConfig(adapter_name="default")) + outputs = moe_peft.generate( + model=model, + tokenizer=tokenizer, + configs=generate_configs, + max_gen_len=128, + ) + + print(f"\n{'=' * 10}\n") + print(f"PROMPT: {test_prompt}\n") + for adapter_name, output in outputs.items(): + print(f"{adapter_name} OUTPUT:") + print(f"{output[0]}\n") + print(f"\n{'=' * 10}\n") + + +if __name__ == "__main__": + fire.Fire(main)