Skip to content

Commit

Permalink
add automated artifacts path
Browse files Browse the repository at this point in the history
  • Loading branch information
olachinkei committed Apr 22, 2024
1 parent c5e5dc7 commit 7233e00
Showing 1 changed file with 11 additions and 19 deletions.
30 changes: 11 additions & 19 deletions colabs/llm-finetuning-handson/Alpaca_finetunning_with_WandB.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -9,6 +9,9 @@
"<h1> From Llama to Alpaca: Finetunning and LLM with Weights & Biases </h1>\n",
"In this notebooks you will learn how to finetune a model on an Instruction dataset. We will use an updated version of the Alpaca dataset that, instead of davinci-003 (GPT3) generations uses GPT4 to get an even better instruction dataset!\n",
"\n",
"<a href=\"https://colab.research.google.com/drive/1bprbJ4HAKEg_1AGse6cmK7-xkMauuNoh?usp=sharing\" target=\"_parent\"><img src=\"https://colab.research.google.com/assets/colab-badge.svg\" alt=\"Open In Colab\"/></a>\n",
"<!--- @wandbcode{mmdetection-wandb-colab} -->\n",
"\n",
"original github: https://github.com/Instruction-Tuning-with-GPT-4/GPT-4-LLM#how-good-is-the-data"
],
"id": "3c7c21b5-4457-481f-b2cc-fb20cdcbfbe3"
Expand Down Expand Up @@ -141,23 +144,12 @@
},
"outputs": [],
"source": [
"os.environ[\"WANDB_ENTITY\"]=\"keisuke-kamata\"\n",
"os.environ[\"WANDB_PROJECT\"]=\"alpaca_finetuning_with_wandb\"\n",
"wandb.login()"
],
"id": "gqAFsykRoQGh"
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {
"id": "IdolWYdFxSML"
},
"outputs": [],
"source": [
"wandb_entity = \"\"\n",
"wandb_project = \"\""
],
"id": "IdolWYdFxSML"
},
{
"cell_type": "code",
"execution_count": null,
Expand All @@ -167,7 +159,7 @@
"outputs": [],
"source": [
"# log to wandb\n",
"with wandb.init(entity=wandb_entity, project=wandb_project):\n",
"with wandb.init():\n",
" # log as a table\n",
" table = wandb.Table(columns=list(alpaca[0].keys()))\n",
" for row in alpaca:\n",
Expand Down Expand Up @@ -233,7 +225,7 @@
},
"outputs": [],
"source": [
"artifact_path = '' # change here!"
"artifact_path = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4:latest'"
],
"id": "BgJI83G-wKz6"
},
Expand All @@ -245,7 +237,7 @@
},
"outputs": [],
"source": [
"with wandb.init(entity=wandb_entity, project=wandb_project, job_type=\"split_data\") as run:\n",
"with wandb.init(job_type=\"split_data\") as run:\n",
" artifact = run.use_artifact(artifact_path, type='dataset')\n",
" #artifact_folder = artifact.download()\n",
"\n",
Expand Down Expand Up @@ -475,7 +467,7 @@
},
"outputs": [],
"source": [
"path_dataset_for_trainig = '' # change here!"
"path_dataset_for_trainig = f'{os.environ[\"WANDB_ENTITY\"]}/{os.environ[\"WANDB_PROJECT\"]}/alpaca_gpt4_splitted:latest'"
],
"id": "WoAiDU3c_xYG"
},
Expand All @@ -487,7 +479,7 @@
},
"outputs": [],
"source": [
"with wandb.init(entity=wandb_entity, project=wandb_project, config=config, job_type=\"training\") as run:\n",
"with wandb.init(config=config, job_type=\"training\") as run:\n",
" # track data\n",
" run.use_artifact(path_dataset_for_trainig)\n",
" # Setup for LoRa\n",
Expand Down Expand Up @@ -714,7 +706,7 @@
" trainer.train()\n",
" run.log_code()\n",
"\n",
"sweep_id = wandb.sweep(sweep=sweep_configuration, project=wandb_project)\n",
"sweep_id = wandb.sweep(sweep=sweep_configuration)\n",
"wandb.agent(sweep_id, function=train_func, count=20)"
],
"id": "-9oEu7S6BfQe"
Expand Down

0 comments on commit 7233e00

Please sign in to comment.