Skip to content

Commit

Permalink
modified: code/training_pipeline/notebooks/POC.ipynb
Browse files Browse the repository at this point in the history
  • Loading branch information
hoangvu971 committed Jul 15, 2024
1 parent ba5f4ea commit 0489717
Showing 1 changed file with 125 additions and 46 deletions.
171 changes: 125 additions & 46 deletions code/training_pipeline/notebooks/POC.ipynb
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,7 @@
},
{
"cell_type": "code",
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"outputs": [
{
Expand All @@ -58,7 +58,7 @@
" 'I-product']"
]
},
"execution_count": 8,
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -70,7 +70,7 @@
},
{
"cell_type": "code",
"execution_count": 26,
"execution_count": 3,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -80,7 +80,7 @@
},
{
"cell_type": "code",
"execution_count": 27,
"execution_count": 4,
"metadata": {},
"outputs": [
{
Expand All @@ -100,7 +100,7 @@
},
{
"cell_type": "code",
"execution_count": 28,
"execution_count": 5,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -126,7 +126,7 @@
},
{
"cell_type": "code",
"execution_count": 29,
"execution_count": 6,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -135,7 +135,7 @@
},
{
"cell_type": "code",
"execution_count": 30,
"execution_count": 7,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -145,7 +145,7 @@
},
{
"cell_type": "code",
"execution_count": 31,
"execution_count": 8,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -155,7 +155,7 @@
},
{
"cell_type": "code",
"execution_count": 32,
"execution_count": 9,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -187,7 +187,7 @@
},
{
"cell_type": "code",
"execution_count": 33,
"execution_count": 10,
"metadata": {},
"outputs": [],
"source": [
Expand Down Expand Up @@ -225,7 +225,7 @@
},
{
"cell_type": "code",
"execution_count": 34,
"execution_count": 11,
"metadata": {},
"outputs": [
{
Expand All @@ -250,13 +250,13 @@
},
{
"cell_type": "code",
"execution_count": 17,
"execution_count": 12,
"metadata": {},
"outputs": [
{
"data": {
"application/vnd.jupyter.widget-view+json": {
"model_id": "26c7497f912c4fcab8b38bc88189c816",
"model_id": "ca68293227464b5590eb76133fcd4c57",
"version_major": 2,
"version_minor": 0
},
Expand All @@ -275,7 +275,7 @@
},
{
"cell_type": "code",
"execution_count": 18,
"execution_count": 13,
"metadata": {},
"outputs": [
{
Expand All @@ -292,7 +292,7 @@
"True"
]
},
"execution_count": 18,
"execution_count": 13,
"metadata": {},
"output_type": "execute_result"
}
Expand All @@ -304,7 +304,7 @@
},
{
"cell_type": "code",
"execution_count": 19,
"execution_count": 14,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -314,23 +314,15 @@
},
{
"cell_type": "code",
"execution_count": 35,
"execution_count": 15,
"metadata": {},
"outputs": [
{
"ename": "ImportError",
"evalue": "Using the `Trainer` with `PyTorch` requires `accelerate>=0.21.0`: Please run `pip install transformers[torch]` or `pip install accelerate -U`",
"output_type": "error",
"traceback": [
"\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
"\u001b[0;31mImportError\u001b[0m Traceback (most recent call last)",
"Cell \u001b[0;32mIn[35], line 3\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mfrom\u001b[39;00m \u001b[38;5;21;01mtransformers\u001b[39;00m \u001b[38;5;28;01mimport\u001b[39;00m TrainingArguments\n\u001b[0;32m----> 3\u001b[0m training_args \u001b[38;5;241m=\u001b[39m \u001b[43mTrainingArguments\u001b[49m\u001b[43m(\u001b[49m\n\u001b[1;32m 4\u001b[0m \u001b[43m \u001b[49m\u001b[43moutput_dir\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mNER_hugg_model\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 5\u001b[0m \u001b[43m \u001b[49m\u001b[43mlearning_rate\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2e-5\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 6\u001b[0m \u001b[43m \u001b[49m\u001b[43mper_device_train_batch_size\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 7\u001b[0m \u001b[43m \u001b[49m\u001b[43mper_device_eval_batch_size\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m16\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 8\u001b[0m \u001b[43m \u001b[49m\u001b[43mnum_train_epochs\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m2\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 9\u001b[0m \u001b[43m \u001b[49m\u001b[43mweight_decay\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;241;43m0.01\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 10\u001b[0m \u001b[43m \u001b[49m\u001b[43mevaluation_strategy\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mepoch\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 11\u001b[0m \u001b[43m \u001b[49m\u001b[43msave_strategy\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mepoch\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 12\u001b[0m \u001b[43m \u001b[49m\u001b[43mload_best_model_at_end\u001b[49m\u001b[43m \u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[43m \u001b[49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\u001b[43m,\u001b[49m\n\u001b[1;32m 13\u001b[0m \u001b[43m \u001b[49m\u001b[43mreport_to\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[38;5;124;43mwandb\u001b[39;49m\u001b[38;5;124;43m\"\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 14\u001b[0m \u001b[43m \u001b[49m\u001b[43mlogging_steps\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;241;43m1\u001b[39;49m\u001b[43m,\u001b[49m\n\u001b[1;32m 15\u001b[0m \u001b[43m \u001b[49m\u001b[43mpush_to_hub\u001b[49m\u001b[38;5;241;43m=\u001b[39;49m\u001b[38;5;28;43;01mTrue\u001b[39;49;00m\n\u001b[1;32m 16\u001b[0m \u001b[43m)\u001b[49m\n",
"File \u001b[0;32m<string>:129\u001b[0m, in \u001b[0;36m__init__\u001b[0;34m(self, output_dir, overwrite_output_dir, do_train, do_eval, do_predict, eval_strategy, prediction_loss_only, per_device_train_batch_size, per_device_eval_batch_size, per_gpu_train_batch_size, per_gpu_eval_batch_size, gradient_accumulation_steps, eval_accumulation_steps, eval_delay, learning_rate, weight_decay, adam_beta1, adam_beta2, adam_epsilon, max_grad_norm, num_train_epochs, max_steps, lr_scheduler_type, lr_scheduler_kwargs, warmup_ratio, warmup_steps, log_level, log_level_replica, log_on_each_node, logging_dir, logging_strategy, logging_first_step, logging_steps, logging_nan_inf_filter, save_strategy, save_steps, save_total_limit, save_safetensors, save_on_each_node, save_only_model, restore_callback_states_from_checkpoint, no_cuda, use_cpu, use_mps_device, seed, data_seed, jit_mode_eval, use_ipex, bf16, fp16, fp16_opt_level, half_precision_backend, bf16_full_eval, fp16_full_eval, tf32, local_rank, ddp_backend, tpu_num_cores, tpu_metrics_debug, debug, dataloader_drop_last, eval_steps, dataloader_num_workers, dataloader_prefetch_factor, past_index, run_name, disable_tqdm, remove_unused_columns, label_names, load_best_model_at_end, metric_for_best_model, greater_is_better, ignore_data_skip, fsdp, fsdp_min_num_params, fsdp_config, fsdp_transformer_layer_cls_to_wrap, accelerator_config, deepspeed, label_smoothing_factor, optim, optim_args, adafactor, group_by_length, length_column_name, report_to, ddp_find_unused_parameters, ddp_bucket_cap_mb, ddp_broadcast_buffers, dataloader_pin_memory, dataloader_persistent_workers, skip_memory_metrics, use_legacy_prediction_loop, push_to_hub, resume_from_checkpoint, hub_model_id, hub_strategy, hub_token, hub_private_repo, hub_always_push, gradient_checkpointing, gradient_checkpointing_kwargs, include_inputs_for_metrics, eval_do_concat_batches, fp16_backend, evaluation_strategy, push_to_hub_model_id, push_to_hub_organization, push_to_hub_token, mp_parameters, auto_find_batch_size, full_determinism, torchdynamo, ray_scope, ddp_timeout, torch_compile, torch_compile_backend, torch_compile_mode, dispatch_batches, split_batches, include_tokens_per_second, include_num_input_tokens_seen, neftune_noise_alpha, optim_target_modules, batch_eval_metrics, eval_on_start)\u001b[0m\n",
"File \u001b[0;32m~/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/transformers/training_args.py:1693\u001b[0m, in \u001b[0;36mTrainingArguments.__post_init__\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 1691\u001b[0m \u001b[38;5;66;03m# Initialize device before we proceed\u001b[39;00m\n\u001b[1;32m 1692\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mframework \u001b[38;5;241m==\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mpt\u001b[39m\u001b[38;5;124m\"\u001b[39m \u001b[38;5;129;01mand\u001b[39;00m is_torch_available():\n\u001b[0;32m-> 1693\u001b[0m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mdevice\u001b[49m\n\u001b[1;32m 1695\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mtorchdynamo \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 1696\u001b[0m warnings\u001b[38;5;241m.\u001b[39mwarn(\n\u001b[1;32m 1697\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m`torchdynamo` is deprecated and will be removed in version 5 of 🤗 Transformers. Use\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 1698\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124m `torch_compile_backend` instead\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 1699\u001b[0m \u001b[38;5;167;01mFutureWarning\u001b[39;00m,\n\u001b[1;32m 1700\u001b[0m )\n",
"File \u001b[0;32m~/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/transformers/training_args.py:2171\u001b[0m, in \u001b[0;36mTrainingArguments.device\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2167\u001b[0m \u001b[38;5;250m\u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2168\u001b[0m \u001b[38;5;124;03mThe device used by this process.\u001b[39;00m\n\u001b[1;32m 2169\u001b[0m \u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 2170\u001b[0m requires_backends(\u001b[38;5;28mself\u001b[39m, [\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mtorch\u001b[39m\u001b[38;5;124m\"\u001b[39m])\n\u001b[0;32m-> 2171\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43m_setup_devices\u001b[49m\n",
"File \u001b[0;32m~/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/transformers/utils/generic.py:60\u001b[0m, in \u001b[0;36mcached_property.__get__\u001b[0;34m(self, obj, objtype)\u001b[0m\n\u001b[1;32m 58\u001b[0m cached \u001b[38;5;241m=\u001b[39m \u001b[38;5;28mgetattr\u001b[39m(obj, attr, \u001b[38;5;28;01mNone\u001b[39;00m)\n\u001b[1;32m 59\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m cached \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m---> 60\u001b[0m cached \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;43mself\u001b[39;49m\u001b[38;5;241;43m.\u001b[39;49m\u001b[43mfget\u001b[49m\u001b[43m(\u001b[49m\u001b[43mobj\u001b[49m\u001b[43m)\u001b[49m\n\u001b[1;32m 61\u001b[0m \u001b[38;5;28msetattr\u001b[39m(obj, attr, cached)\n\u001b[1;32m 62\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m cached\n",
"File \u001b[0;32m~/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/transformers/training_args.py:2051\u001b[0m, in \u001b[0;36mTrainingArguments._setup_devices\u001b[0;34m(self)\u001b[0m\n\u001b[1;32m 2049\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_sagemaker_mp_enabled():\n\u001b[1;32m 2050\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m is_accelerate_available():\n\u001b[0;32m-> 2051\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mImportError\u001b[39;00m(\n\u001b[1;32m 2052\u001b[0m \u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mUsing the `Trainer` with `PyTorch` requires `accelerate>=\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mACCELERATE_MIN_VERSION\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m`: \u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2053\u001b[0m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mPlease run `pip install transformers[torch]` or `pip install accelerate -U`\u001b[39m\u001b[38;5;124m\"\u001b[39m\n\u001b[1;32m 2054\u001b[0m )\n\u001b[1;32m 2055\u001b[0m \u001b[38;5;66;03m# We delay the init of `PartialState` to the end for clarity\u001b[39;00m\n\u001b[1;32m 2056\u001b[0m accelerator_state_kwargs \u001b[38;5;241m=\u001b[39m {\u001b[38;5;124m\"\u001b[39m\u001b[38;5;124menabled\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mTrue\u001b[39;00m, \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124muse_configured_state\u001b[39m\u001b[38;5;124m\"\u001b[39m: \u001b[38;5;28;01mFalse\u001b[39;00m}\n",
"\u001b[0;31mImportError\u001b[0m: Using the `Trainer` with `PyTorch` requires `accelerate>=0.21.0`: Please run `pip install transformers[torch]` or `pip install accelerate -U`"
"name": "stderr",
"output_type": "stream",
"text": [
"/root/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/transformers/training_args.py:1494: FutureWarning: `evaluation_strategy` is deprecated and will be removed in version 4.46 of 🤗 Transformers. Use `eval_strategy` instead\n",
" warnings.warn(\n"
]
}
],
Expand All @@ -355,7 +347,7 @@
},
{
"cell_type": "code",
"execution_count": 14,
"execution_count": 16,
"metadata": {},
"outputs": [],
"source": [
Expand All @@ -374,9 +366,88 @@
},
{
"cell_type": "code",
"execution_count": 22,
"execution_count": 17,
"metadata": {},
"outputs": [
{
"name": "stderr",
"output_type": "stream",
"text": [
"\u001b[34m\u001b[1mwandb\u001b[0m: \u001b[33mWARNING\u001b[0m The `run_name` is currently set to the same value as `TrainingArguments.output_dir`. If this was not intended, please specify a different run name by setting the `TrainingArguments.run_name` parameter.\n"
]
},
{
"data": {
"text/html": [
"Changes to your `wandb` environment variables will be ignored because your `wandb` session has already started. For more information on how to modify your settings with `wandb.init()` arguments, please refer to <a href='https://wandb.me/wandb-init' target=\"_blank\">the W&B docs</a>."
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Tracking run with wandb version 0.17.4"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Run data is saved locally in <code>/root/NER-project/code/training_pipeline/notebooks/wandb/run-20240715_155632-3hxfkptd</code>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
"Syncing run <strong><a href='https://wandb.ai/thomas24/NER/runs/3hxfkptd' target=\"_blank\">NER_hugg_model</a></strong> to <a href='https://wandb.ai/thomas24/NER' target=\"_blank\">Weights & Biases</a> (<a href='https://wandb.me/run' target=\"_blank\">docs</a>)<br/>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View project at <a href='https://wandb.ai/thomas24/NER' target=\"_blank\">https://wandb.ai/thomas24/NER</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
" View run at <a href='https://wandb.ai/thomas24/NER/runs/3hxfkptd' target=\"_blank\">https://wandb.ai/thomas24/NER/runs/3hxfkptd</a>"
],
"text/plain": [
"<IPython.core.display.HTML object>"
]
},
"metadata": {},
"output_type": "display_data"
},
{
"data": {
"text/html": [
Expand All @@ -401,21 +472,21 @@
" <tbody>\n",
" <tr>\n",
" <td>1</td>\n",
" <td>No log</td>\n",
" <td>0.261921</td>\n",
" <td>0.470937</td>\n",
" <td>0.367933</td>\n",
" <td>0.413111</td>\n",
" <td>0.943525</td>\n",
" <td>0.559600</td>\n",
" <td>0.279779</td>\n",
" <td>0.650113</td>\n",
" <td>0.266914</td>\n",
" <td>0.378449</td>\n",
" <td>0.939079</td>\n",
" </tr>\n",
" <tr>\n",
" <td>2</td>\n",
" <td>No log</td>\n",
" <td>0.281322</td>\n",
" <td>0.531695</td>\n",
" <td>0.380908</td>\n",
" <td>0.443844</td>\n",
" <td>0.946005</td>\n",
" <td>0.043400</td>\n",
" <td>0.268090</td>\n",
" <td>0.615108</td>\n",
" <td>0.316960</td>\n",
" <td>0.418349</td>\n",
" <td>0.941986</td>\n",
" </tr>\n",
" </tbody>\n",
"</table><p>"
Expand All @@ -427,13 +498,21 @@
"metadata": {},
"output_type": "display_data"
},
{
"name": "stderr",
"output_type": "stream",
"text": [
"/root/anaconda3/envs/training-pipeline/lib/python3.10/site-packages/seqeval/metrics/v1.py:57: UndefinedMetricWarning: Precision and F-score are ill-defined and being set to 0.0 in labels with no predicted samples. Use `zero_division` parameter to control this behavior.\n",
" _warn_prf(average, modifier, msg_start, len(result))\n"
]
},
{
"data": {
"text/plain": [
"TrainOutput(global_step=426, training_loss=0.07569359613696175, metrics={'train_runtime': 36.763, 'train_samples_per_second': 184.642, 'train_steps_per_second': 11.588, 'total_flos': 91781128898820.0, 'train_loss': 0.07569359613696175, 'epoch': 2.0})"
"TrainOutput(global_step=426, training_loss=0.20726914455493292, metrics={'train_runtime': 40.7102, 'train_samples_per_second': 166.739, 'train_steps_per_second': 10.464, 'total_flos': 91781128898820.0, 'train_loss': 0.20726914455493292, 'epoch': 2.0})"
]
},
"execution_count": 22,
"execution_count": 17,
"metadata": {},
"output_type": "execute_result"
}
Expand Down

0 comments on commit 0489717

Please sign in to comment.