diff --git a/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb b/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
index 56b9a65c..29eb9d7e 100644
--- a/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
+++ b/colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
@@ -9,6 +9,9 @@
"
From Llama to Alpaca: Finetunning and LLM with Weights & Biases
\n",
"In this notebooks you will learn how to finetune a model on an Instruction dataset. We will use an updated version of the Alpaca dataset that, instead of davinci-003 (GPT3) generations uses GPT4 to get an even better instruction dataset!\n",
"\n",
+ "\n",
+ "\n",
+ "\n",
"original github: https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM#how-good-is-the-data"
],
"id": "3c7c21b5-4457-481f-b2cc-fb20cdcbfbe3"
@@ -141,23 +144,12 @@
},
"outputs": [],
"source": [
+ "os.environ[\"WANDB_ENTITY\"]=\"keisuke-kamata\"\n",
+ "os.environ[\"WANDB_PROJECT\"]=\"alpaca_finetuning_with_wandb\"\n",
"wandb.login()"
],
"id": "gqAFsykRoQGh"
},
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {
- "id": "IdolWYdFxSML"
- },
- "outputs": [],
- "source": [
- "wandb_entity = \"\"\n",
- "wandb_project = \"\""
- ],
- "id": "IdolWYdFxSML"
- },
{
"cell_type": "code",
"execution_count": null,
@@ -167,7 +159,7 @@
"outputs": [],
"source": [
"# log to wandb\n",
- "with wandb.init(entity=wandb_entity, project=wandb_project):\n",
+ "with wandb.init():\n",
" # log as a table\n",
" table = wandb.Table(columns=list(alpaca[0].keys()))\n",
" for row in alpaca:\n",
@@ -233,7 +225,7 @@
},
"outputs": [],
"source": [
- "artifact_path = '' # change here!"
+ "artifact_path = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4:latest'"
],
"id": "BgJI83G-wKz6"
},
@@ -245,7 +237,7 @@
},
"outputs": [],
"source": [
- "with wandb.init(entity=wandb_entity, project=wandb_project, job_type=\"split_data\") as run:\n",
+ "with wandb.init(job_type=\"split_data\") as run:\n",
" artifact = run.use_artifact(artifact_path, type='dataset')\n",
" #artifact_folder = artifact.download()\n",
"\n",
@@ -475,7 +467,7 @@
},
"outputs": [],
"source": [
- "path_dataset_for_trainig = '' # change here!"
+ "path_dataset_for_trainig = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4_splitted:latest'"
],
"id": "WoAiDU3c_xYG"
},
@@ -487,7 +479,7 @@
},
"outputs": [],
"source": [
- "with wandb.init(entity=wandb_entity, project=wandb_project, config=config, job_type=\"training\") as run:\n",
+ "with wandb.init(config=config, job_type=\"training\") as run:\n",
" # track data\n",
" run.use_artifact(path_dataset_for_trainig)\n",
" # Setup for LoRa\n",
@@ -714,7 +706,7 @@
" trainer.train()\n",
" run.log_code()\n",
"\n",
- "sweep_id = wandb.sweep(sweep=sweep_configuration, project=wandb_project)\n",
+ "sweep_id = wandb.sweep(sweep=sweep_configuration)\n",
"wandb.agent(sweep_id, function=train_func, count=20)"
],
"id": "-9oEu7S6BfQe"