From a263c4ba834221fb47f64873b1bb454f8ded59b2 Mon Sep 17 00:00:00 2001
From: Thomas Capelle <tcapelle@pm.me>
Date: Wed, 29 May 2024 16:03:38 +0200
Subject: [PATCH] clean up

---
 colabs/peft/llama_token_cls.ipynb | 946 ++++++++++++++----------------
 1 file changed, 442 insertions(+), 504 deletions(-)
diff --git a/colabs/peft/llama_token_cls.ipynb b/colabs/peft/llama_token_cls.ipynb
index 2dd44d58..55d68a90 100644
--- a/colabs/peft/llama_token_cls.ipynb
+++ b/colabs/peft/llama_token_cls.ipynb
@@ -1,507 +1,445 @@
 {
-  "nbformat": 4,
-  "nbformat_minor": 0,
-  "metadata": {
-    "colab": {
-      "provenance": [],
-      "collapsed_sections": [
-        "qQZXezFvGvgg",
-        "BhOagCyPTiwO",
-        "wV-xDETxODhA",
-        "u3sWpjEuRpwV"
-      ],
-      "toc_visible": true,
-      "gpuType": "T4",
-      "private_outputs": true,
-      "cell_execution_strategy": "setup",
-      "include_colab_link": true
-    },
-    "kernelspec": {
-      "name": "python3",
-      "display_name": "Python 3"
-    },
-    "language_info": {
-      "name": "python"
-    },
-    "accelerator": "GPU"
+ "cells": [
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "<a href=\"https://colab.research.google.com/github/wandb/examples/blob/master/colabs/peft/llama_token_cls.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n"
+   ]
   },
-  "cells": [
-    {
-      "cell_type": "markdown",
-      "metadata": {
-        "id": "view-in-github",
-        "colab_type": "text"
-      },
-      "source": [
-        "<a href=\"https://colab.research.google.com/github/SauravMaheshkar/examples/blob/saurav%2Fpeft-llama-example/colabs/peft/llama_token_cls.ipynb\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>"
-      ]
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "![](llama_img.png)"
-      ],
-      "metadata": {
-        "id": "jY57rlNA8rn_"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 📦 Packages and Basic Setup\n",
-        "---\n",
-        "\n",
-        "To run the notebooks you'll need two secrets named `W&B` and `HF_TOKEN`. Also, in the configuration section change the `wandb_entity` to your username/workspace."
-      ],
-      "metadata": {
-        "id": "guHo1NzrGc33"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "%%capture\n",
-        "!pip install -q -U bitsandbytes datasets evaluate ml-collections seqeval wandb\n",
-        "!pip install -q git+https://github.com/huggingface/peft.git"
-      ],
-      "metadata": {
-        "id": "YoK6kYdVEjmI"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "zIMX8IAYFytA"
-      },
-      "outputs": [],
-      "source": [
-        "import evaluate\n",
-        "import numpy as np\n",
-        "from transformers import AutoTokenizer\n",
-        "from datasets import ClassLabel, load_dataset\n",
-        "from transformers import TrainingArguments, Trainer\n",
-        "from peft import get_peft_model, LoraConfig, TaskType\n",
-        "from transformers import DataCollatorForTokenClassification"
-      ]
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import wandb\n",
-        "wandb.login()"
-      ],
-      "metadata": {
-        "id": "uV4ifIYHTxsa"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "# @title ⚙️ Configuration\n",
-        "\n",
-        "import ml_collections\n",
-        "\n",
-        "def get_config() -> ml_collections.ConfigDict:\n",
-        "  config = ml_collections.ConfigDict()\n",
-        "  config.model = \"unsloth/llama-2-7b-bnb-4bit\" # @param {type: \"string\"}\n",
-        "  config.lora_r = 4  # @param {type: \"number\"}\n",
-        "  config.lora_alpha = 32  # @param {type: \"number\"}\n",
-        "  config.lora_dropout = 0.1  # @param {type: \"number\"}\n",
-        "  config.max_length = 32  # @param {type: \"number\"}\n",
-        "  config.batch_size = 16  # @param {type: \"number\"}\n",
-        "  config.num_epochs = 5 # @param {type: \"number\"}\n",
-        "  config.learning_rate = 1e-3 # @param {type: \"number\"}\n",
-        "  config.dataset = \"conll2003\" # @param {type: \"string\"}\n",
-        "  config.wandb_entity = None # @param {type: \"string\"}\n",
-        "  return config\n",
-        "\n",
-        "config = get_config()"
-      ],
-      "metadata": {
-        "cellView": "form",
-        "id": "xGpvXtooGe5c"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "import os\n",
-        "import wandb\n",
-        "\n",
-        "wandb.init(\n",
-        "  project=\"Llama-NER\",\n",
-        "  job_type=\"train\",\n",
-        "  group=config.model,\n",
-        "  config = config.to_dict(),\n",
-        "  entity=config.wandb_entity,\n",
-        ")\n",
-        "\n",
-        "os.environ[\"WANDB_WATCH\"]=\"false\"\n",
-        "os.environ[\"WANDB_LOG_MODEL\"]=\"false\"\n",
-        "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
-      ],
-      "metadata": {
-        "id": "unwtlHx-UFSY"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 💿 The Dataset\n",
-        "---"
-      ],
-      "metadata": {
-        "id": "qQZXezFvGvgg"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "%%capture\n",
-        "ds = load_dataset(\n",
-        "  config.dataset,\n",
-        "  cache_dir=\"/cache/\",\n",
-        ")\n",
-        "\n",
-        "seqeval = evaluate.load(\"seqeval\")"
-      ],
-      "metadata": {
-        "id": "N4L9dLPJGynQ"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "column_names = ds[\"train\"].column_names\n",
-        "features = ds[\"train\"].features\n",
-        "\n",
-        "text_column_name = \"tokens\"\n",
-        "label_column_name = \"ner_tags\"\n",
-        "\n",
-        "label_list = features[label_column_name].feature.names\n",
-        "label2id = {i: i for i in range(len(label_list))}\n",
-        "id2label = {v: k for k, v in label2id.items()}"
-      ],
-      "metadata": {
-        "id": "WfyZrcMQH9gu"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 🖖 Utility Functions\n",
-        "---"
-      ],
-      "metadata": {
-        "id": "BhOagCyPTiwO"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def compute_metrics(p):\n",
-        "    predictions, labels = p\n",
-        "    predictions = np.argmax(predictions, axis=2)\n",
-        "\n",
-        "    true_predictions = [\n",
-        "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
-        "        for prediction, label in zip(predictions, labels)\n",
-        "    ]\n",
-        "    true_labels = [\n",
-        "        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
-        "        for prediction, label in zip(predictions, labels)\n",
-        "    ]\n",
-        "\n",
-        "    results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
-        "    return {\n",
-        "        \"precision\": results[\"overall_precision\"],\n",
-        "        \"recall\": results[\"overall_recall\"],\n",
-        "        \"f1\": results[\"overall_f1\"],\n",
-        "        \"accuracy\": results[\"overall_accuracy\"],\n",
-        "    }"
-      ],
-      "metadata": {
-        "id": "f03prfWbTxsK"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 🏠 Model Architecture\n",
-        "---"
-      ],
-      "metadata": {
-        "id": "_fPQvJkJLV1B"
-      }
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Implementating `LlamaForTokenClassification`\n",
-        "\n",
-        "[Source: @KoichiYasuoka](https://github.com/huggingface/transformers/issues/26521#issuecomment-1868284434)"
-      ],
-      "metadata": {
-        "id": "wV-xDETxODhA"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "%%capture\n",
-        "from typing import List, Optional, Tuple, Union\n",
-        "import torch\n",
-        "from torch import nn\n",
-        "from transformers.modeling_outputs import TokenClassifierOutput\n",
-        "from transformers.file_utils import add_start_docstrings_to_model_forward\n",
-        "from transformers.models.llama.modeling_llama import LlamaModel, LlamaPreTrainedModel, LLAMA_INPUTS_DOCSTRING\n",
-        "\n",
-        "class LlamaForTokenClassification(LlamaPreTrainedModel):\n",
-        "    def __init__(self, config):\n",
-        "        super().__init__(config)\n",
-        "        self.num_labels = config.num_labels\n",
-        "        self.model = LlamaModel(config)\n",
-        "        if hasattr(config, \"classifier_dropout\") and config.classifier_dropout is not None:\n",
-        "            classifier_dropout = config.classifier_dropout\n",
-        "        elif hasattr(config, \"hidden_dropout\") and config.hidden_dropout is not None:\n",
-        "            classifier_dropout = config.hidden_dropout\n",
-        "        else:\n",
-        "            classifier_dropout = 0.1\n",
-        "        self.dropout = nn.Dropout(classifier_dropout)\n",
-        "        self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
-        "\n",
-        "        # Initialize weights and apply final processing\n",
-        "        self.post_init()\n",
-        "\n",
-        "    def get_input_embeddings(self):\n",
-        "        return self.model.embed_tokens\n",
-        "\n",
-        "    def set_input_embeddings(self, value):\n",
-        "        self.model.embed_tokens = value\n",
-        "\n",
-        "    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)\n",
-        "    def forward(\n",
-        "        self,\n",
-        "        input_ids: Optional[torch.LongTensor] = None,\n",
-        "        attention_mask: Optional[torch.Tensor] = None,\n",
-        "        position_ids: Optional[torch.LongTensor] = None,\n",
-        "        past_key_values: Optional[List[torch.FloatTensor]] = None,\n",
-        "        inputs_embeds: Optional[torch.FloatTensor] = None,\n",
-        "        labels: Optional[torch.LongTensor] = None,\n",
-        "        use_cache: Optional[bool] = None,\n",
-        "        output_attentions: Optional[bool] = None,\n",
-        "        output_hidden_states: Optional[bool] = None,\n",
-        "        return_dict: Optional[bool] = None,\n",
-        "    ) -> Union[Tuple, TokenClassifierOutput]:\n",
-        "\n",
-        "        return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n",
-        "\n",
-        "        transformer_outputs = self.model(\n",
-        "            input_ids,\n",
-        "            attention_mask=attention_mask,\n",
-        "            position_ids=position_ids,\n",
-        "            past_key_values=past_key_values,\n",
-        "            inputs_embeds=inputs_embeds,\n",
-        "            use_cache=use_cache,\n",
-        "            output_attentions=output_attentions,\n",
-        "            output_hidden_states=output_hidden_states,\n",
-        "            return_dict=return_dict,\n",
-        "        )\n",
-        "\n",
-        "        hidden_states = transformer_outputs[0]\n",
-        "        hidden_states = self.dropout(hidden_states)\n",
-        "        logits = self.classifier(hidden_states)\n",
-        "\n",
-        "        loss = None\n",
-        "        if labels is not None:\n",
-        "            labels = labels.to(logits.device)\n",
-        "            loss_fct = nn.CrossEntropyLoss()\n",
-        "            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))\n",
-        "\n",
-        "        if not return_dict:\n",
-        "            output = (logits,) + transformer_outputs[2:]\n",
-        "            return ((loss,) + output) if loss is not None else output\n",
-        "\n",
-        "        return TokenClassifierOutput(\n",
-        "            loss=loss,\n",
-        "            logits=logits,\n",
-        "            hidden_states=transformer_outputs.hidden_states,\n",
-        "            attentions=transformer_outputs.attentions\n",
-        "        )\n",
-        "\n",
-        "tokenizer = AutoTokenizer.from_pretrained(config.model)\n",
-        "\n",
-        "model = LlamaForTokenClassification.from_pretrained(\n",
-        "  config.model,\n",
-        "  num_labels=len(label_list),\n",
-        "  id2label=id2label,\n",
-        "  label2id=label2id,\n",
-        "  cache_dir=\"/cache/\",\n",
-        ")"
-      ],
-      "metadata": {
-        "id": "O7fqyqpnMY3m"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Process Dataset for Token Classification"
-      ],
-      "metadata": {
-        "id": "u3sWpjEuRpwV"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "def tokenize_and_align_labels(examples):\n",
-        "    tokenized_inputs = tokenizer(examples[\"tokens\"], is_split_into_words=True, padding='longest', max_length=config.max_length, truncation=True)\n",
-        "\n",
-        "    labels = []\n",
-        "    for i, label in enumerate(examples[f\"ner_tags\"]):\n",
-        "        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.\n",
-        "        previous_word_idx = None\n",
-        "        label_ids = []\n",
-        "        for word_idx in word_ids:  # Set the special tokens to -100.\n",
-        "            if word_idx is None:\n",
-        "                label_ids.append(-100)\n",
-        "            elif word_idx != previous_word_idx:  # Only label the first token of a given word.\n",
-        "                label_ids.append(label[word_idx])\n",
-        "            else:\n",
-        "                label_ids.append(-100)\n",
-        "            previous_word_idx = word_idx\n",
-        "        labels.append(label_ids)\n",
-        "\n",
-        "    tokenized_inputs[\"labels\"] = labels\n",
-        "    return tokenized_inputs\n",
-        "\n",
-        "tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)\n",
-        "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
-      ],
-      "metadata": {
-        "id": "rvlLnwEHRsOF"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "### Obtaining 🤗 PEFT Model"
-      ],
-      "metadata": {
-        "id": "Xi8EfJTNQZ50"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "peft_config = LoraConfig(\n",
-        "  task_type=TaskType.TOKEN_CLS,\n",
-        "  inference_mode=False,\n",
-        "  r=config.lora_r,\n",
-        "  lora_alpha=config.lora_alpha,\n",
-        "  lora_dropout=config.lora_dropout\n",
-        ")\n",
-        "\n",
-        "model = get_peft_model(model, peft_config)\n",
-        "model.print_trainable_parameters()"
-      ],
-      "metadata": {
-        "id": "7CrzHHifLYym"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## ✍️ Training\n",
-        "---"
-      ],
-      "metadata": {
-        "id": "3pcAz6LISPEy"
-      }
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "training_args = TrainingArguments(\n",
-        "    output_dir=\"unsloth-llama-2-7b-bnb-4bit-conll2003\",\n",
-        "    learning_rate=config.learning_rate,\n",
-        "    gradient_accumulation_steps=2,\n",
-        "    per_device_train_batch_size=config.batch_size,\n",
-        "    per_device_eval_batch_size=config.batch_size,\n",
-        "    num_train_epochs=config.num_epochs,\n",
-        "    logging_steps=100,\n",
-        "    weight_decay=0.01,\n",
-        "    evaluation_strategy=\"epoch\",\n",
-        "    save_strategy=\"epoch\",\n",
-        "    report_to=[\"wandb\"],\n",
-        "    optim=\"paged_adamw_8bit\",\n",
-        "    load_best_model_at_end=True,\n",
-        "    push_to_hub=True,\n",
-        ")\n",
-        "\n",
-        "trainer = Trainer(\n",
-        "    model=model,\n",
-        "    args=training_args,\n",
-        "    train_dataset=tokenized_ds[\"train\"],\n",
-        "    eval_dataset=tokenized_ds[\"test\"],\n",
-        "    tokenizer=tokenizer,\n",
-        "    data_collator=data_collator,\n",
-        "    compute_metrics=compute_metrics,\n",
-        ")\n",
-        "\n",
-        "train_results = trainer.train()"
-      ],
-      "metadata": {
-        "id": "XzWxRp27SQz9"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "code",
-      "source": [
-        "wandb.config.train_results = train_results\n",
-        "wandb.finish()"
-      ],
-      "metadata": {
-        "id": "7rORnBvUVea1"
-      },
-      "execution_count": null,
-      "outputs": []
-    },
-    {
-      "cell_type": "markdown",
-      "source": [
-        "## 📚 References\n",
-        "\n",
-        "* Github: [`4AI/LS-LLaMA`](https://github.com/4AI/LS-LLaMA)\n",
-        "* [Alpaca + Llama 7b example by `@unslothai`](https://colab.research.google.com/drive/1lBzz5KeZJKXjvivbYvmGarix9Ao6Wxe5?usp=sharing)"
-      ],
-      "metadata": {
-        "id": "GYHkmefyPTOQ"
-      }
-    }
-  ]
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "![](llama_img.png)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 📦 Packages and Basic Setup\n",
+    "---\n",
+    "\n",
+    "To run the notebooks you'll need two secrets named `W&B` and `HF_TOKEN`. Also, in the configuration section change the `wandb_entity` to your username/workspace."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "!pip install -q -U bitsandbytes datasets evaluate ml-collections seqeval wandb\n",
+    "!pip install -q git+https://github.com/huggingface/peft.git"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import evaluate\n",
+    "import numpy as np\n",
+    "from transformers import AutoTokenizer\n",
+    "from datasets import ClassLabel, load_dataset\n",
+    "from transformers import TrainingArguments, Trainer\n",
+    "from peft import get_peft_model, LoraConfig, TaskType\n",
+    "from transformers import DataCollatorForTokenClassification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import wandb\n",
+    "wandb.login()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# @title ⚙️ Configuration\n",
+    "\n",
+    "import ml_collections\n",
+    "\n",
+    "def get_config() -> ml_collections.ConfigDict:\n",
+    "  config = ml_collections.ConfigDict()\n",
+    "  config.model = \"unsloth/llama-2-7b-bnb-4bit\" # @param {type: \"string\"}\n",
+    "  config.lora_r = 4  # @param {type: \"number\"}\n",
+    "  config.lora_alpha = 32  # @param {type: \"number\"}\n",
+    "  config.lora_dropout = 0.1  # @param {type: \"number\"}\n",
+    "  config.max_length = 32  # @param {type: \"number\"}\n",
+    "  config.batch_size = 16  # @param {type: \"number\"}\n",
+    "  config.num_epochs = 5 # @param {type: \"number\"}\n",
+    "  config.learning_rate = 1e-3 # @param {type: \"number\"}\n",
+    "  config.dataset = \"conll2003\" # @param {type: \"string\"}\n",
+    "  config.wandb_entity = None # @param {type: \"string\"}\n",
+    "  return config\n",
+    "\n",
+    "config = get_config()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "import os\n",
+    "import wandb\n",
+    "\n",
+    "wandb.init(\n",
+    "  project=\"Llama-NER\",\n",
+    "  job_type=\"train\",\n",
+    "  group=config.model,\n",
+    "  config = config.to_dict(),\n",
+    "  entity=config.wandb_entity,\n",
+    ")\n",
+    "\n",
+    "os.environ[\"WANDB_WATCH\"]=\"false\"\n",
+    "os.environ[\"WANDB_LOG_MODEL\"]=\"false\"\n",
+    "os.environ[\"CUDA_VISIBLE_DEVICES\"] = \"0\""
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 💿 The Dataset\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "ds = load_dataset(\n",
+    "  config.dataset,\n",
+    "  cache_dir=\"/cache/\",\n",
+    ")\n",
+    "\n",
+    "seqeval = evaluate.load(\"seqeval\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "column_names = ds[\"train\"].column_names\n",
+    "features = ds[\"train\"].features\n",
+    "\n",
+    "text_column_name = \"tokens\"\n",
+    "label_column_name = \"ner_tags\"\n",
+    "\n",
+    "label_list = features[label_column_name].feature.names\n",
+    "label2id = {i: i for i in range(len(label_list))}\n",
+    "id2label = {v: k for k, v in label2id.items()}"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 🖖 Utility Functions\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def compute_metrics(p):\n",
+    "    predictions, labels = p\n",
+    "    predictions = np.argmax(predictions, axis=2)\n",
+    "\n",
+    "    true_predictions = [\n",
+    "        [label_list[p] for (p, l) in zip(prediction, label) if l != -100]\n",
+    "        for prediction, label in zip(predictions, labels)\n",
+    "    ]\n",
+    "    true_labels = [\n",
+    "        [label_list[l] for (p, l) in zip(prediction, label) if l != -100]\n",
+    "        for prediction, label in zip(predictions, labels)\n",
+    "    ]\n",
+    "\n",
+    "    results = seqeval.compute(predictions=true_predictions, references=true_labels)\n",
+    "    return {\n",
+    "        \"precision\": results[\"overall_precision\"],\n",
+    "        \"recall\": results[\"overall_recall\"],\n",
+    "        \"f1\": results[\"overall_f1\"],\n",
+    "        \"accuracy\": results[\"overall_accuracy\"],\n",
+    "    }"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 🏠 Model Architecture\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Implementating `LlamaForTokenClassification`\n",
+    "\n",
+    "[Source: @KoichiYasuoka](https://github.com/huggingface/transformers/issues/26521#issuecomment-1868284434)"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "%%capture\n",
+    "from typing import List, Optional, Tuple, Union\n",
+    "import torch\n",
+    "from torch import nn\n",
+    "from transformers.modeling_outputs import TokenClassifierOutput\n",
+    "from transformers.file_utils import add_start_docstrings_to_model_forward\n",
+    "from transformers.models.llama.modeling_llama import LlamaModel, LlamaPreTrainedModel, LLAMA_INPUTS_DOCSTRING\n",
+    "\n",
+    "class LlamaForTokenClassification(LlamaPreTrainedModel):\n",
+    "    def __init__(self, config):\n",
+    "        super().__init__(config)\n",
+    "        self.num_labels = config.num_labels\n",
+    "        self.model = LlamaModel(config)\n",
+    "        if hasattr(config, \"classifier_dropout\") and config.classifier_dropout is not None:\n",
+    "            classifier_dropout = config.classifier_dropout\n",
+    "        elif hasattr(config, \"hidden_dropout\") and config.hidden_dropout is not None:\n",
+    "            classifier_dropout = config.hidden_dropout\n",
+    "        else:\n",
+    "            classifier_dropout = 0.1\n",
+    "        self.dropout = nn.Dropout(classifier_dropout)\n",
+    "        self.classifier = nn.Linear(config.hidden_size, config.num_labels)\n",
+    "\n",
+    "        # Initialize weights and apply final processing\n",
+    "        self.post_init()\n",
+    "\n",
+    "    def get_input_embeddings(self):\n",
+    "        return self.model.embed_tokens\n",
+    "\n",
+    "    def set_input_embeddings(self, value):\n",
+    "        self.model.embed_tokens = value\n",
+    "\n",
+    "    @add_start_docstrings_to_model_forward(LLAMA_INPUTS_DOCSTRING)\n",
+    "    def forward(\n",
+    "        self,\n",
+    "        input_ids: Optional[torch.LongTensor] = None,\n",
+    "        attention_mask: Optional[torch.Tensor] = None,\n",
+    "        position_ids: Optional[torch.LongTensor] = None,\n",
+    "        past_key_values: Optional[List[torch.FloatTensor]] = None,\n",
+    "        inputs_embeds: Optional[torch.FloatTensor] = None,\n",
+    "        labels: Optional[torch.LongTensor] = None,\n",
+    "        use_cache: Optional[bool] = None,\n",
+    "        output_attentions: Optional[bool] = None,\n",
+    "        output_hidden_states: Optional[bool] = None,\n",
+    "        return_dict: Optional[bool] = None,\n",
+    "    ) -> Union[Tuple, TokenClassifierOutput]:\n",
+    "\n",
+    "        return_dict = return_dict if return_dict is not None else self.config.use_return_dict\n",
+    "\n",
+    "        transformer_outputs = self.model(\n",
+    "            input_ids,\n",
+    "            attention_mask=attention_mask,\n",
+    "            position_ids=position_ids,\n",
+    "            past_key_values=past_key_values,\n",
+    "            inputs_embeds=inputs_embeds,\n",
+    "            use_cache=use_cache,\n",
+    "            output_attentions=output_attentions,\n",
+    "            output_hidden_states=output_hidden_states,\n",
+    "            return_dict=return_dict,\n",
+    "        )\n",
+    "\n",
+    "        hidden_states = transformer_outputs[0]\n",
+    "        hidden_states = self.dropout(hidden_states)\n",
+    "        logits = self.classifier(hidden_states)\n",
+    "\n",
+    "        loss = None\n",
+    "        if labels is not None:\n",
+    "            labels = labels.to(logits.device)\n",
+    "            loss_fct = nn.CrossEntropyLoss()\n",
+    "            loss = loss_fct(logits.view(-1, self.num_labels), labels.view(-1))\n",
+    "\n",
+    "        if not return_dict:\n",
+    "            output = (logits,) + transformer_outputs[2:]\n",
+    "            return ((loss,) + output) if loss is not None else output\n",
+    "\n",
+    "        return TokenClassifierOutput(\n",
+    "            loss=loss,\n",
+    "            logits=logits,\n",
+    "            hidden_states=transformer_outputs.hidden_states,\n",
+    "            attentions=transformer_outputs.attentions\n",
+    "        )\n",
+    "\n",
+    "tokenizer = AutoTokenizer.from_pretrained(config.model)\n",
+    "\n",
+    "model = LlamaForTokenClassification.from_pretrained(\n",
+    "  config.model,\n",
+    "  num_labels=len(label_list),\n",
+    "  id2label=id2label,\n",
+    "  label2id=label2id,\n",
+    "  cache_dir=\"/cache/\",\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Process Dataset for Token Classification"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "def tokenize_and_align_labels(examples):\n",
+    "    tokenized_inputs = tokenizer(examples[\"tokens\"], is_split_into_words=True, padding='longest', max_length=config.max_length, truncation=True)\n",
+    "\n",
+    "    labels = []\n",
+    "    for i, label in enumerate(examples[f\"ner_tags\"]):\n",
+    "        word_ids = tokenized_inputs.word_ids(batch_index=i)  # Map tokens to their respective word.\n",
+    "        previous_word_idx = None\n",
+    "        label_ids = []\n",
+    "        for word_idx in word_ids:  # Set the special tokens to -100.\n",
+    "            if word_idx is None:\n",
+    "                label_ids.append(-100)\n",
+    "            elif word_idx != previous_word_idx:  # Only label the first token of a given word.\n",
+    "                label_ids.append(label[word_idx])\n",
+    "            else:\n",
+    "                label_ids.append(-100)\n",
+    "            previous_word_idx = word_idx\n",
+    "        labels.append(label_ids)\n",
+    "\n",
+    "    tokenized_inputs[\"labels\"] = labels\n",
+    "    return tokenized_inputs\n",
+    "\n",
+    "tokenized_ds = ds.map(tokenize_and_align_labels, batched=True)\n",
+    "data_collator = DataCollatorForTokenClassification(tokenizer=tokenizer)"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "### Obtaining 🤗 PEFT Model"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "peft_config = LoraConfig(\n",
+    "  task_type=TaskType.TOKEN_CLS,\n",
+    "  inference_mode=False,\n",
+    "  r=config.lora_r,\n",
+    "  lora_alpha=config.lora_alpha,\n",
+    "  lora_dropout=config.lora_dropout\n",
+    ")\n",
+    "\n",
+    "model = get_peft_model(model, peft_config)\n",
+    "model.print_trainable_parameters()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## ✍️ Training\n",
+    "---"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "training_args = TrainingArguments(\n",
+    "    output_dir=\"unsloth-llama-2-7b-bnb-4bit-conll2003\",\n",
+    "    learning_rate=config.learning_rate,\n",
+    "    gradient_accumulation_steps=2,\n",
+    "    per_device_train_batch_size=config.batch_size,\n",
+    "    per_device_eval_batch_size=config.batch_size,\n",
+    "    num_train_epochs=config.num_epochs,\n",
+    "    logging_steps=100,\n",
+    "    weight_decay=0.01,\n",
+    "    evaluation_strategy=\"epoch\",\n",
+    "    save_strategy=\"epoch\",\n",
+    "    report_to=[\"wandb\"],\n",
+    "    optim=\"paged_adamw_8bit\",\n",
+    "    load_best_model_at_end=True,\n",
+    "    push_to_hub=True,\n",
+    ")\n",
+    "\n",
+    "trainer = Trainer(\n",
+    "    model=model,\n",
+    "    args=training_args,\n",
+    "    train_dataset=tokenized_ds[\"train\"],\n",
+    "    eval_dataset=tokenized_ds[\"test\"],\n",
+    "    tokenizer=tokenizer,\n",
+    "    data_collator=data_collator,\n",
+    "    compute_metrics=compute_metrics,\n",
+    ")\n",
+    "\n",
+    "train_results = trainer.train()"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "wandb.config.train_results = train_results\n",
+    "wandb.finish()"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## 📚 References\n",
+    "\n",
+    "* Github: [`4AI/LS-LLaMA`](https://github.com/4AI/LS-LLaMA)\n",
+    "* [Alpaca + Llama 7b example by `@unslothai`](https://colab.research.google.com/drive/1lBzz5KeZJKXjvivbYvmGarix9Ao6Wxe5?usp=sharing)"
+   ]
+  }
+ ],
+ "metadata": {
+  "accelerator": "GPU",
+  "colab": {
+   "include_colab_link": true,
+   "provenance": [],
+   "toc_visible": true
+  },
+  "kernelspec": {
+   "display_name": "Python 3",
+   "name": "python3"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 0
 }