From 7233e00adbda79696f4e9ac921192e84278312e3 Mon Sep 17 00:00:00 2001
From: olachinkei <keisuke.kamata@wandb.com>
Date: Mon, 22 Apr 2024 15:21:48 +0900
Subject: [PATCH] add automated artifacts path

---
 .../Alpaca_finetunning_with_WandB.ipynb       | 30 +++++++------------
 1 file changed, 11 insertions(+), 19 deletions(-)
diff --git a/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb b/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
index 56b9a65c..29eb9d7e 100644
--- a/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
+++ b/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
@@ -9,6 +9,9 @@
         "<h1> From Llama to Alpaca: Finetunning and LLM with Weights & Biases </h1>\n",
         "In this notebooks you will learn how to finetune a model on an Instruction dataset. We will use an updated version of the Alpaca dataset that, instead of davinci-003 (GPT3) generations uses GPT4 to get an even better instruction dataset!\n",
         "\n",
+        "<a href=\"https://colab.research.google.com/drive/1bprbJ4HAKEg_1AGse6cmK7-xkMauuNoh?usp=sharing\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
+        "<!--- @wandbcode{mmdetection-wandb-colab} -->\n",
+        "\n",
         "original github: https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM#how-good-is-the-data"
       ],
       "id": "3c7c21b5-4457-481f-b2cc-fb20cdcbfbe3"
@@ -141,23 +144,12 @@
       },
       "outputs": [],
       "source": [
+        "os.environ[\"WANDB_ENTITY\"]=\"keisuke-kamata\"\n",
+        "os.environ[\"WANDB_PROJECT\"]=\"alpaca_finetuning_with_wandb\"\n",
         "wandb.login()"
       ],
       "id": "gqAFsykRoQGh"
     },
-    {
-      "cell_type": "code",
-      "execution_count": null,
-      "metadata": {
-        "id": "IdolWYdFxSML"
-      },
-      "outputs": [],
-      "source": [
-        "wandb_entity = \"\"\n",
-        "wandb_project = \"\""
-      ],
-      "id": "IdolWYdFxSML"
-    },
     {
       "cell_type": "code",
       "execution_count": null,
@@ -167,7 +159,7 @@
       "outputs": [],
       "source": [
         "# log to wandb\n",
-        "with wandb.init(entity=wandb_entity, project=wandb_project):\n",
+        "with wandb.init():\n",
         "    # log as a table\n",
         "    table = wandb.Table(columns=list(alpaca[0].keys()))\n",
         "    for row in alpaca:\n",
@@ -233,7 +225,7 @@
       },
       "outputs": [],
       "source": [
-        "artifact_path = '' # change here!"
+        "artifact_path = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4:latest'"
       ],
       "id": "BgJI83G-wKz6"
     },
@@ -245,7 +237,7 @@
       },
       "outputs": [],
       "source": [
-        "with wandb.init(entity=wandb_entity, project=wandb_project, job_type=\"split_data\") as run:\n",
+        "with wandb.init(job_type=\"split_data\") as run:\n",
         "    artifact = run.use_artifact(artifact_path, type='dataset')\n",
         "    #artifact_folder = artifact.download()\n",
         "\n",
@@ -475,7 +467,7 @@
       },
       "outputs": [],
       "source": [
-        "path_dataset_for_trainig = '' # change here!"
+        "path_dataset_for_trainig = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4_splitted:latest'"
       ],
       "id": "WoAiDU3c_xYG"
     },
@@ -487,7 +479,7 @@
       },
       "outputs": [],
       "source": [
-        "with wandb.init(entity=wandb_entity, project=wandb_project, config=config, job_type=\"training\") as run:\n",
+        "with wandb.init(config=config, job_type=\"training\") as run:\n",
         "    # track data\n",
         "    run.use_artifact(path_dataset_for_trainig)\n",
         "    # Setup for LoRa\n",
@@ -714,7 +706,7 @@
         "        trainer.train()\n",
         "        run.log_code()\n",
         "\n",
-        "sweep_id = wandb.sweep(sweep=sweep_configuration, project=wandb_project)\n",
+        "sweep_id = wandb.sweep(sweep=sweep_configuration)\n",
         "wandb.agent(sweep_id, function=train_func, count=20)"
       ],
       "id": "-9oEu7S6BfQe"