add import_ckpt script and minor changes

Signed-off-by: HuiyingLi <[email protected]>
HuiyingLi · Oct 20, 2024 · 883a61d · 883a61d
1 parent b28922e
commit 883a61d
Show file tree

Hide file tree

Showing 2 changed files with 31 additions and 14 deletions.
diff --git a/tutorials/llm/nemo2-peft.ipynb b/tutorials/llm/nemo2-peft.ipynb
@@ -66,9 +66,7 @@
    "source": [
     "# Step 0: Go inside docker container\n",
     "\n",
-    "Here is a demo of starting and go inside the container on DGX Cloud. '\n",
-    "\n",
-    "Otherwise, you can start and enter the dev container by:  #TODO: FIX CONTAINER\n",
+    "You can start and enter the dev container by:  #TODO: FIX CONTAINER\n",
     "```\n",
     "docker run --gpus device=1 --shm-size=2g --net=host --ulimit memlock=-1 --rm -it -v ${PWD}:/workspace -w /workspace -v ${PWD}/results:/results nvcr.io/nvidia/nemo:dev bash\n",
     "\n",
@@ -87,9 +85,7 @@
     "$ huggingface-cli login\n",
     "```\n",
     "\n",
-    "Once you are logged in, NeMo 2.0 will automatically import the Hugging Face model and start training. There is no need to manully convert to NeMo checkpoint format.\n",
-    "\n",
-    "Let's first import needed python modules:"
+    "Once logged in, you can use the following script to import a Hugging Face model. Based on the provided model configuration (`Llama3-8b` in the example below), the `llm.import_ckpt` API will download the specified model using the \"hf://<huggingface_model_id>\" URL format. It will then convert the model into NeMo 2.0 format and store it at the given `output_path`.\n"
    ]
   },
   {
@@ -102,7 +98,17 @@
     "from nemo.collections import llm\n",
     "from megatron.core.optimizer import OptimizerConfig\n",
     "import torch\n",
-    "import pytorch_lightning as pl"
+    "import pytorch_lightning as pl\n",
+    "from pathlib import Path\n",
+    "\n",
+    "def llama3_8b() -> pl.LightningModule:\n",
+    "    from transformers import AutoTokenizer\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3-8B\")\n",
+    "    return llm.LlamaModel(llm.Llama3Config8B(), tokenizer=tokenizer)\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    output_path=\"llama3-8b-nemo2\"\n",
+    "    llm.import_ckpt(model=llama3_8b(), source=\"hf://meta-llama/Meta-Llama-3-8B\",output_path=Path(output_path))\n"
    ]
   },
   {
@@ -332,6 +338,7 @@
     "from megatron.core.optimizer import OptimizerConfig\n",
     "import torch\n",
     "import pytorch_lightning as pl\n",
+    "from pathlib import Path\n",
     "\n",
     "\n",
     "def trainer(devices=1) -> nl.Trainer:\n",
@@ -407,6 +414,8 @@
     "    )\n",
     "\n",
     "if __name__ == '__main__':\n",
+    "    output_path=\"llama3-8b-nemo2\"\n",
+    "    llm.import_ckpt(model=llama3_8b(), source=\"hf://meta-llama/Meta-Llama-3-8B\",output_path=Path(output_path))\n",
     "    llm.finetune(\n",
     "        model=llama3_8b(),\n",
     "        data=squad(),\n",

diff --git a/tutorials/llm/nemo2-sft.ipynb b/tutorials/llm/nemo2-sft.ipynb
@@ -49,9 +49,7 @@
    "source": [
     "# Step 0: Go inside docker container\n",
     "\n",
-    "Here is a demo of starting and go inside the container on DGX Cloud. '\n",
-    "\n",
-    "Otherwise, you can start and enter the dev container by:  #TODO: FIX CONTAINER\n",
+    "You can start and enter the dev container by:  #TODO: FIX CONTAINER\n",
     "```\n",
     "docker run --gpus device=1 --shm-size=2g --net=host --ulimit memlock=-1 --rm -it -v ${PWD}:/workspace -w /workspace -v ${PWD}/results:/results nvcr.io/nvidia/nemo:dev bash\n",
     "\n",
@@ -70,9 +68,7 @@
     "$ huggingface-cli login\n",
     "```\n",
     "\n",
-    "Once you are logged in, NeMo 2.0 will automatically import the Hugging Face model and start training. There is no need to manully convert to NeMo checkpoint format.\n",
-    "\n",
-    "Let's first import needed python modules:"
+    "Once logged in, you can use the following script to import a Hugging Face model. Based on the provided model configuration (`Llama3-8b` in the example below), the `llm.import_ckpt` API will download the specified model using the \"hf://<huggingface_model_id>\" URL format. It will then convert the model into NeMo 2.0 format and store it at the given `output_path`."
    ]
   },
   {
@@ -85,7 +81,17 @@
     "from nemo.collections import llm\n",
     "from megatron.core.optimizer import OptimizerConfig\n",
     "import torch\n",
-    "import pytorch_lightning as pl"
+    "import pytorch_lightning as pl\n",
+    "from pathlib import Path\n",
+    "\n",
+    "def llama3_8b() -> pl.LightningModule:\n",
+    "    from transformers import AutoTokenizer\n",
+    "    tokenizer = AutoTokenizer.from_pretrained(\"meta-llama/Meta-Llama-3-8B\")\n",
+    "    return llm.LlamaModel(llm.Llama3Config8B(), tokenizer=tokenizer)\n",
+    "\n",
+    "if __name__ == '__main__':\n",
+    "    output_path=\"llama3-8b-nemo2\"\n",
+    "    llm.import_ckpt(model=llama3_8b(), source=\"hf://meta-llama/Meta-Llama-3-8B\",output_path=Path(output_path))"
    ]
   },
   {
@@ -531,6 +537,8 @@
     "    )\n",
     "\n",
     "if __name__ == '__main__':\n",
+    "    output_path=\"llama3-8b-nemo2\"\n",
+    "    llm.import_ckpt(model=llama3_8b(), source=\"hf://meta-llama/Meta-Llama-3-8B\",output_path=Path(output_path))\n",
     "    llm.finetune(\n",
     "        model=llama3_8b(),\n",
     "        data=dolly(),\n",