diff --git a/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb b/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb
index d6ef0c3..4d7f2ed 100644
--- a/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb
+++ b/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb
@@ -16,7 +16,7 @@
},
{
"cell_type": "code",
- "execution_count": 1,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -50,17 +50,9 @@
},
{
"cell_type": "code",
- "execution_count": 2,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "env: CUBLAS_WORKSPACE_CONFIG=:4096:8\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"def set_seed(seed = int):\n",
" '''Sets the seed of the entire notebook so results are the same every time we run.\n",
@@ -82,44 +74,20 @@
},
{
"cell_type": "code",
- "execution_count": 3,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using custom data configuration deprem-private--intent-v13-2cc636b800f876bf\n",
- "Found cached dataset json (C:/Users/dmg_e/.cache/huggingface/datasets/deprem-private___json/deprem-private--intent-v13-2cc636b800f876bf/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n"
- ]
- },
- {
- "data": {
- "application/vnd.jupyter.widget-view+json": {
- "model_id": "be31ba12c6cf4c7b832836f66144f298",
- "version_major": 2,
- "version_minor": 0
- },
- "text/plain": [
- " 0%| | 0/2 [00:00, ?it/s]"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"intent = datasets.load_dataset(\"deprem-private/intent-v13\")"
]
},
{
"cell_type": "code",
- "execution_count": 4,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"df_train = pd.DataFrame().from_records(list(intent[\"train\"]))\n",
- "# df_val = pd.DataFrame().from_records(list(intent[\"validation\"]))\n",
"df_test = pd.DataFrame().from_records(list(intent[\"test\"]))"
]
},
@@ -128,29 +96,6 @@
"execution_count": null,
"metadata": {},
"outputs": [],
- "source": [
- "# from ast import literal_eval\n",
- "# import itertools\n",
- "\n",
- "# a=df_train.label.apply(lambda x: literal_eval(str(x))).tolist()\n",
- "# st = set(list(itertools.chain.from_iterable(a)))"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": null,
- "metadata": {},
- "outputs": [],
- "source": [
- "# name2ix = {k: v for v,k in enumerate(st)}\n",
- "# ix2name = {v: k for k, v in name2ix.items()}\n"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 15,
- "metadata": {},
- "outputs": [],
"source": [
"ix2name = {0: 'Lojistik',\n",
" 1: 'Elektrik Kaynagi',\n",
@@ -172,149 +117,25 @@
},
{
"cell_type": "code",
- "execution_count": 16,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{0: 'Lojistik',\n",
- " 1: 'Elektrik Kaynagi',\n",
- " 2: 'Arama Ekipmani',\n",
- " 3: 'Cenaze',\n",
- " 4: 'Giysi',\n",
- " 5: 'Enkaz Kaldirma',\n",
- " 6: 'Isinma',\n",
- " 7: 'Barınma',\n",
- " 8: 'Tuvalet',\n",
- " 9: 'Su',\n",
- " 10: 'Yemek',\n",
- " 11: 'Saglik',\n",
- " 12: 'Alakasiz'}"
- ]
- },
- "execution_count": 16,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"ix2name"
]
},
{
"cell_type": "code",
- "execution_count": 17,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/html": [
- "
"
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"token_counts = []\n",
"\n",
@@ -435,7 +222,7 @@
},
{
"cell_type": "code",
- "execution_count": 24,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -466,7 +253,7 @@
},
{
"cell_type": "code",
- "execution_count": 25,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -477,7 +264,7 @@
},
{
"cell_type": "code",
- "execution_count": 26,
+ "execution_count": null,
"metadata": {
"tags": []
},
@@ -494,18 +281,9 @@
},
{
"cell_type": "code",
- "execution_count": 27,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- "(6492, 6)\n",
- "(1620, 6)\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"from iterstrat.ml_stratifiers import MultilabelStratifiedKFold\n",
"\n",
@@ -522,32 +300,9 @@
},
{
"cell_type": "code",
- "execution_count": 28,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "{0: 3.017203135650159,\n",
- " 1: 2.4823691788825464,\n",
- " 2: 1.941736822154725,\n",
- " 3: 6.172646581418988,\n",
- " 4: 1.8759436445637834,\n",
- " 5: 1.0,\n",
- " 6: 1.75011143349181,\n",
- " 7: 1.2730236191357969,\n",
- " 8: 4.849237178079731,\n",
- " 9: 2.4857419672410703,\n",
- " 10: 1.6324480531290084,\n",
- " 11: 2.0033774839735035,\n",
- " 12: 1.3688883733394182}"
- ]
- },
- "execution_count": 28,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"occs = np.sum(mlb_labels[df_train.index],\n",
" axis=0)\n",
@@ -562,7 +317,7 @@
},
{
"cell_type": "code",
- "execution_count": 29,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -589,7 +344,7 @@
},
{
"cell_type": "code",
- "execution_count": 30,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -631,7 +386,7 @@
},
{
"cell_type": "code",
- "execution_count": 31,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -647,7 +402,7 @@
},
{
"cell_type": "code",
- "execution_count": 32,
+ "execution_count": null,
"metadata": {},
"outputs": [],
"source": [
@@ -657,7 +412,7 @@
},
{
"cell_type": "code",
- "execution_count": 33,
+ "execution_count": null,
"metadata": {
"scrolled": true,
"tags": []
@@ -672,23 +427,14 @@
" fp16=True,\n",
" evaluation_strategy = \"epoch\",\n",
" save_strategy = \"no\",\n",
- " #learning_rate=2e-5,\n",
" per_device_train_batch_size=batch_size,\n",
" per_device_eval_batch_size=batch_size*2,\n",
" num_train_epochs=4,\n",
- " #weight_decay=0.01,\n",
" load_best_model_at_end=False,\n",
" metric_for_best_model=\"macro f1\",\n",
- " # eval_steps = step_size,\n",
- " # save_steps = step_size,\n",
- " # logging_steps = step_size,\n",
" seed = 42,\n",
" data_seed = 42,\n",
" dataloader_num_workers = 0,\n",
- " #lr_scheduler_type = 'linear',\n",
- " #warmup_steps=0, # number of warmup steps for learning rate scheduler\n",
- " #weight_decay=0, # strength of weight decay\n",
- " #save_total_limit=1, # limit the total amount of checkpoints. Deletes the older checkpoints.\n",
" full_determinism = True,\n",
" group_by_length = True\n",
" )\n",
@@ -723,425 +469,9 @@
},
{
"cell_type": "code",
- "execution_count": 34,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n",
- "loading configuration file config.json from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\config.json\n",
- "Model config BertConfig {\n",
- " \"_name_or_path\": \"dbmdz/bert-base-turkish-128k-uncased\",\n",
- " \"attention_probs_dropout_prob\": 0.1,\n",
- " \"classifier_dropout\": null,\n",
- " \"hidden_act\": \"gelu\",\n",
- " \"hidden_dropout_prob\": 0.1,\n",
- " \"hidden_size\": 768,\n",
- " \"id2label\": {\n",
- " \"0\": \"Lojistik\",\n",
- " \"1\": \"Elektrik Kaynagi\",\n",
- " \"2\": \"Arama Ekipmani\",\n",
- " \"3\": \"Cenaze\",\n",
- " \"4\": \"Giysi\",\n",
- " \"5\": \"Enkaz Kaldirma\",\n",
- " \"6\": \"Isinma\",\n",
- " \"7\": \"Bar\\u0131nma\",\n",
- " \"8\": \"Tuvalet\",\n",
- " \"9\": \"Su\",\n",
- " \"10\": \"Yemek\",\n",
- " \"11\": \"Saglik\",\n",
- " \"12\": \"Alakasiz\"\n",
- " },\n",
- " \"initializer_range\": 0.02,\n",
- " \"intermediate_size\": 3072,\n",
- " \"label2id\": {\n",
- " \"Alakasiz\": 12,\n",
- " \"Arama Ekipmani\": 2,\n",
- " \"Bar\\u0131nma\": 7,\n",
- " \"Cenaze\": 3,\n",
- " \"Elektrik Kaynagi\": 1,\n",
- " \"Enkaz Kaldirma\": 5,\n",
- " \"Giysi\": 4,\n",
- " \"Isinma\": 6,\n",
- " \"Lojistik\": 0,\n",
- " \"Saglik\": 11,\n",
- " \"Su\": 9,\n",
- " \"Tuvalet\": 8,\n",
- " \"Yemek\": 10\n",
- " },\n",
- " \"layer_norm_eps\": 1e-12,\n",
- " \"max_position_embeddings\": 512,\n",
- " \"model_type\": \"bert\",\n",
- " \"num_attention_heads\": 12,\n",
- " \"num_hidden_layers\": 12,\n",
- " \"pad_token_id\": 0,\n",
- " \"position_embedding_type\": \"absolute\",\n",
- " \"problem_type\": \"multi_label_classification\",\n",
- " \"transformers_version\": \"4.24.0\",\n",
- " \"type_vocab_size\": 2,\n",
- " \"use_cache\": true,\n",
- " \"vocab_size\": 128000\n",
- "}\n",
- "\n",
- "loading weights file pytorch_model.bin from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\pytorch_model.bin\n",
- "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
- "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "Using cuda_amp half precision backend\n",
- "loading configuration file config.json from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\config.json\n",
- "Model config BertConfig {\n",
- " \"_name_or_path\": \"dbmdz/bert-base-turkish-128k-uncased\",\n",
- " \"attention_probs_dropout_prob\": 0.1,\n",
- " \"classifier_dropout\": null,\n",
- " \"hidden_act\": \"gelu\",\n",
- " \"hidden_dropout_prob\": 0.1,\n",
- " \"hidden_size\": 768,\n",
- " \"id2label\": {\n",
- " \"0\": \"Lojistik\",\n",
- " \"1\": \"Elektrik Kaynagi\",\n",
- " \"2\": \"Arama Ekipmani\",\n",
- " \"3\": \"Cenaze\",\n",
- " \"4\": \"Giysi\",\n",
- " \"5\": \"Enkaz Kaldirma\",\n",
- " \"6\": \"Isinma\",\n",
- " \"7\": \"Bar\\u0131nma\",\n",
- " \"8\": \"Tuvalet\",\n",
- " \"9\": \"Su\",\n",
- " \"10\": \"Yemek\",\n",
- " \"11\": \"Saglik\",\n",
- " \"12\": \"Alakasiz\"\n",
- " },\n",
- " \"initializer_range\": 0.02,\n",
- " \"intermediate_size\": 3072,\n",
- " \"label2id\": {\n",
- " \"Alakasiz\": 12,\n",
- " \"Arama Ekipmani\": 2,\n",
- " \"Bar\\u0131nma\": 7,\n",
- " \"Cenaze\": 3,\n",
- " \"Elektrik Kaynagi\": 1,\n",
- " \"Enkaz Kaldirma\": 5,\n",
- " \"Giysi\": 4,\n",
- " \"Isinma\": 6,\n",
- " \"Lojistik\": 0,\n",
- " \"Saglik\": 11,\n",
- " \"Su\": 9,\n",
- " \"Tuvalet\": 8,\n",
- " \"Yemek\": 10\n",
- " },\n",
- " \"layer_norm_eps\": 1e-12,\n",
- " \"max_position_embeddings\": 512,\n",
- " \"model_type\": \"bert\",\n",
- " \"num_attention_heads\": 12,\n",
- " \"num_hidden_layers\": 12,\n",
- " \"pad_token_id\": 0,\n",
- " \"position_embedding_type\": \"absolute\",\n",
- " \"problem_type\": \"multi_label_classification\",\n",
- " \"transformers_version\": \"4.24.0\",\n",
- " \"type_vocab_size\": 2,\n",
- " \"use_cache\": true,\n",
- " \"vocab_size\": 128000\n",
- "}\n",
- "\n",
- "loading weights file pytorch_model.bin from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\pytorch_model.bin\n",
- "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n",
- "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n",
- "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n",
- "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n",
- "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n",
- "C:\\Users\\dmg_e\\anaconda3\\envs\\nlp\\lib\\site-packages\\transformers\\optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n",
- " warnings.warn(\n",
- "***** Running training *****\n",
- " Num examples = 6492\n",
- " Num Epochs = 4\n",
- " Instantaneous batch size per device = 32\n",
- " Total train batch size (w. parallel, distributed & accumulation) = 32\n",
- " Gradient Accumulation steps = 1\n",
- " Total optimization steps = 812\n",
- " Number of trainable parameters = 184355341\n"
- ]
- },
- {
- "data": {
- "text/html": [
- "\n",
- " \n",
- " \n",
- "
\n",
- " [812/812 04:52, Epoch 4/4]\n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " Step | \n",
- " Training Loss | \n",
- " Validation Loss | \n",
- " Micro f1 | \n",
- " Macro f1 | \n",
- "
\n",
- " \n",
- " \n",
- " \n",
- " 49 | \n",
- " 0.012600 | \n",
- " 0.008991 | \n",
- " 0.506440 | \n",
- " 0.302808 | \n",
- "
\n",
- " \n",
- " 98 | \n",
- " 0.007000 | \n",
- " 0.005209 | \n",
- " 0.816014 | \n",
- " 0.629818 | \n",
- "
\n",
- " \n",
- " 147 | \n",
- " 0.005000 | \n",
- " 0.004140 | \n",
- " 0.851512 | \n",
- " 0.737092 | \n",
- "
\n",
- " \n",
- " 196 | \n",
- " 0.004600 | \n",
- " 0.003730 | \n",
- " 0.861579 | \n",
- " 0.764193 | \n",
- "
\n",
- " \n",
- " 245 | \n",
- " 0.003400 | \n",
- " 0.003418 | \n",
- " 0.867908 | \n",
- " 0.740340 | \n",
- "
\n",
- " \n",
- " 294 | \n",
- " 0.003000 | \n",
- " 0.003193 | \n",
- " 0.877217 | \n",
- " 0.777911 | \n",
- "
\n",
- " \n",
- " 343 | \n",
- " 0.003100 | \n",
- " 0.003197 | \n",
- " 0.860201 | \n",
- " 0.749202 | \n",
- "
\n",
- " \n",
- " 392 | \n",
- " 0.003000 | \n",
- " 0.002911 | \n",
- " 0.880481 | \n",
- " 0.779105 | \n",
- "
\n",
- " \n",
- " 441 | \n",
- " 0.002100 | \n",
- " 0.002965 | \n",
- " 0.882129 | \n",
- " 0.814388 | \n",
- "
\n",
- " \n",
- " 490 | \n",
- " 0.001600 | \n",
- " 0.002963 | \n",
- " 0.888193 | \n",
- " 0.820314 | \n",
- "
\n",
- " \n",
- " 539 | \n",
- " 0.002000 | \n",
- " 0.003001 | \n",
- " 0.888388 | \n",
- " 0.828849 | \n",
- "
\n",
- " \n",
- " 588 | \n",
- " 0.001900 | \n",
- " 0.002873 | \n",
- " 0.887235 | \n",
- " 0.830829 | \n",
- "
\n",
- " \n",
- " 637 | \n",
- " 0.001500 | \n",
- " 0.002813 | \n",
- " 0.888235 | \n",
- " 0.837983 | \n",
- "
\n",
- " \n",
- " 686 | \n",
- " 0.001400 | \n",
- " 0.002915 | \n",
- " 0.892704 | \n",
- " 0.833584 | \n",
- "
\n",
- " \n",
- " 735 | \n",
- " 0.001200 | \n",
- " 0.002902 | \n",
- " 0.892688 | \n",
- " 0.857335 | \n",
- "
\n",
- " \n",
- " 784 | \n",
- " 0.001200 | \n",
- " 0.002860 | \n",
- " 0.897036 | \n",
- " 0.864054 | \n",
- "
\n",
- " \n",
- "
"
- ],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- },
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245] due to args.save_total_limit\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\\pytorch_model.bin\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196] due to args.save_total_limit\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\\pytorch_model.bin\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294] due to args.save_total_limit\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\\pytorch_model.bin\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637] due to args.save_total_limit\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686] due to args.save_total_limit\n",
- "***** Running Evaluation *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n",
- "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\n",
- "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\\config.json\n",
- "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\\pytorch_model.bin\n",
- "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735] due to args.save_total_limit\n",
- "\n",
- "\n",
- "Training completed. Do not forget to share your model on huggingface.co/models =)\n",
- "\n",
- "\n",
- "Loading best model from turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784 (score: 0.8640538586818446).\n"
- ]
- },
- {
- "data": {
- "text/plain": [
- "TrainOutput(global_step=812, training_loss=0.003325057638065862, metrics={'train_runtime': 293.4609, 'train_samples_per_second': 88.489, 'train_steps_per_second': 2.767, 'total_flos': 1334598181891200.0, 'train_loss': 0.003325057638065862, 'epoch': 4.0})"
- ]
- },
- "execution_count": 34,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"training_args = TrainingArguments(\n",
" f\"turkish_multilabel_intent_{model_name.split('/')[-1]}\",\n",
@@ -1186,126 +516,27 @@
"cell_type": "code",
"execution_count": null,
"metadata": {
- "scrolled": true,
"tags": []
},
"outputs": [],
- "source": [
- "# training_args = TrainingArguments(\n",
- "# f\"turkish_multilabel_intent_{model_name.split('/')[-1]}\",\n",
- "# fp16=True,\n",
- "# evaluation_strategy = \"steps\",\n",
- "# save_strategy = \"steps\",\n",
- "# learning_rate=best_run[-1]['learning_rate'],\n",
- "# per_device_train_batch_size=32,\n",
- "# per_device_eval_batch_size=batch_size*2,\n",
- "# num_train_epochs=4,\n",
- "# #weight_decay=0.01,\n",
- "# load_best_model_at_end=True,\n",
- "# metric_for_best_model=\"macro f1\",\n",
- "# eval_steps = step_size,\n",
- "# save_steps = step_size,\n",
- "# logging_steps = step_size,\n",
- "# seed = 42,\n",
- "# data_seed = 42,\n",
- "# dataloader_num_workers = 0,\n",
- "# lr_scheduler_type = best_run[-1]['lr_scheduler_type'],\n",
- "# warmup_steps=best_run[-1]['warmup_steps'], # number of warmup steps for learning rate scheduler\n",
- "# weight_decay=best_run[-1]['weight_decay'], # strength of weight decay\n",
- "# save_total_limit=1, # limit the total amount of checkpoints. Deletes the older checkpoints.\n",
- "# full_determinism = True,\n",
- "# group_by_length = True\n",
- "# )\n",
- "\n",
- "# trainer = ImbalancedTrainer(\n",
- "# class_weights=class_weights,\n",
- "# model_init=model_init,\n",
- "# args=training_args,\n",
- "# data_collator=data_collator,\n",
- "# train_dataset=IntentDataset(df_train),\n",
- "# eval_dataset=IntentDataset(df_val),\n",
- "# compute_metrics=compute_metrics,\n",
- "# )\n",
- "\n",
- "# trainer.train()"
- ]
- },
- {
- "cell_type": "code",
- "execution_count": 35,
- "metadata": {
- "tags": []
- },
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "***** Running Prediction *****\n",
- " Num examples = 2028\n",
- " Batch size = 64\n"
- ]
- },
- {
- "data": {
- "text/html": [],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
"source": [
"preds = trainer.predict(IntentDataset(df_test))"
]
},
{
"cell_type": "code",
- "execution_count": 38,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stderr",
- "output_type": "stream",
- "text": [
- "***** Running Prediction *****\n",
- " Num examples = 1620\n",
- " Batch size = 64\n"
- ]
- },
- {
- "data": {
- "text/html": [],
- "text/plain": [
- ""
- ]
- },
- "metadata": {},
- "output_type": "display_data"
- }
- ],
+ "outputs": [],
"source": [
"preds = trainer.predict(IntentDataset(df_val))"
]
},
{
"cell_type": "code",
- "execution_count": 39,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "data": {
- "text/plain": [
- "(0.58, 0.8666219455858628)"
- ]
- },
- "execution_count": 39,
- "metadata": {},
- "output_type": "execute_result"
- }
- ],
+ "outputs": [],
"source": [
"best_thr = -1\n",
"best_score = 0.\n",
@@ -1321,37 +552,9 @@
},
{
"cell_type": "code",
- "execution_count": 40,
+ "execution_count": null,
"metadata": {},
- "outputs": [
- {
- "name": "stdout",
- "output_type": "stream",
- "text": [
- " precision recall f1-score support\n",
- "\n",
- " Lojistik 0.72 0.75 0.74 28\n",
- "Elektrik Kaynagi 0.81 0.80 0.80 49\n",
- " Arama Ekipmani 0.76 0.83 0.79 103\n",
- " Cenaze 1.00 0.67 0.80 3\n",
- " Giysi 0.84 0.96 0.89 114\n",
- " Enkaz Kaldirma 0.95 0.91 0.93 753\n",
- " Isinma 0.84 0.85 0.85 141\n",
- " Barınma 0.96 0.97 0.97 365\n",
- " Tuvalet 1.00 1.00 1.00 7\n",
- " Su 0.85 0.90 0.87 49\n",
- " Yemek 0.91 0.97 0.94 173\n",
- " Saglik 0.78 0.94 0.85 94\n",
- " Alakasiz 0.84 0.83 0.83 294\n",
- "\n",
- " micro avg 0.89 0.90 0.90 2173\n",
- " macro avg 0.87 0.87 0.87 2173\n",
- " weighted avg 0.90 0.90 0.90 2173\n",
- " samples avg 0.90 0.91 0.90 2173\n",
- "\n"
- ]
- }
- ],
+ "outputs": [],
"source": [
"print(classification_report(preds.label_ids.astype(int), (sigmoid(preds.predictions) > 0.53).astype(int), target_names=name2ix.keys(), zero_division=0))"
]