diff --git a/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb b/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb index d6ef0c3..4d7f2ed 100644 --- a/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb +++ b/notebooks/training/MultiLabelIntentClassification_128k_v2_baseline.ipynb @@ -16,7 +16,7 @@ }, { "cell_type": "code", - "execution_count": 1, + "execution_count": null, "metadata": { "tags": [] }, @@ -50,17 +50,9 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "env: CUBLAS_WORKSPACE_CONFIG=:4096:8\n" - ] - } - ], + "outputs": [], "source": [ "def set_seed(seed = int):\n", " '''Sets the seed of the entire notebook so results are the same every time we run.\n", @@ -82,44 +74,20 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using custom data configuration deprem-private--intent-v13-2cc636b800f876bf\n", - "Found cached dataset json (C:/Users/dmg_e/.cache/huggingface/datasets/deprem-private___json/deprem-private--intent-v13-2cc636b800f876bf/0.0.0/0f7e3662623656454fcd2b650f34e886a7db4b9104504885bd462096cc7a9f51)\n" - ] - }, - { - "data": { - "application/vnd.jupyter.widget-view+json": { - "model_id": "be31ba12c6cf4c7b832836f66144f298", - "version_major": 2, - "version_minor": 0 - }, - "text/plain": [ - " 0%| | 0/2 [00:00\n", - "\n", - "\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
image_urllabellabel_confidencelabelerlabel_creation_time
0Kopyala yapıştır yaparak yayar mısınız lütfen ...[Enkaz Kaldirma][0.6666666667]system_consensus1676305452109
1-Antakya-Baba ve down sendromlu çocuğu enkaz a...[Enkaz Kaldirma][1.0]system_consensus1676310480396
2YAKIN ZAMANDA SES‼️Cebrail Mahallesi Fahri Kor...[Enkaz Kaldirma][1.0]system_consensus1676321565920
3@BabalaTv ACİLL YARDIMM ÇAĞRISIDIR!!! TEYİTLİ ...[Giysi, Yemek, Su][0.6666666667, 0.6666666667, 0.6666666667]system_consensus1676241278029
4@alniopulesitivt Bahçelievler mahallesi Trabzo...[Alakasiz][1.0]system_consensus1676321392038
\n", - "" - ], - "text/plain": [ - " image_url label \\\n", - "0 Kopyala yapıştır yaparak yayar mısınız lütfen ... [Enkaz Kaldirma] \n", - "1 -Antakya-Baba ve down sendromlu çocuğu enkaz a... [Enkaz Kaldirma] \n", - "2 YAKIN ZAMANDA SES‼️Cebrail Mahallesi Fahri Kor... [Enkaz Kaldirma] \n", - "3 @BabalaTv ACİLL YARDIMM ÇAĞRISIDIR!!! TEYİTLİ ... [Giysi, Yemek, Su] \n", - "4 @alniopulesitivt Bahçelievler mahallesi Trabzo... [Alakasiz] \n", - "\n", - " label_confidence labeler \\\n", - "0 [0.6666666667] system_consensus \n", - "1 [1.0] system_consensus \n", - "2 [1.0] system_consensus \n", - "3 [0.6666666667, 0.6666666667, 0.6666666667] system_consensus \n", - "4 [1.0] system_consensus \n", - "\n", - " label_creation_time \n", - "0 1676305452109 \n", - "1 1676310480396 \n", - "2 1676321565920 \n", - "3 1676241278029 \n", - "4 1676321392038 " - ] - }, - "execution_count": 17, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_train.head()" ] }, { "cell_type": "code", - "execution_count": 18, + "execution_count": null, "metadata": { "tags": [] }, @@ -324,34 +145,22 @@ "text_col = \"image_url\"\n", "\n", "df_train = df_train[df_train[label_col].notnull()].reset_index(drop=True)\n", - "# df_val = df_val[df_val[label_col].notnull()].reset_index(drop=True)\n", "\n", "df_test = df_test[df_test[label_col].notnull()].reset_index(drop=True)" ] }, { "cell_type": "code", - "execution_count": 19, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "'-Antakya-Baba ve down sendromlu çocuğu enkaz altındaMüberra Güngör: 05322938539ekip var ama paletli kepçe ve ışık bekleniyor müdahale edilemiyorADRES:GAZI MAH. ADNANMENDERES CAD. NO 89 SUiT GÜZELLiK MERKEZI ÜSTÜ Antakya '" - ] - }, - "execution_count": 19, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "df_train.loc[1, \"image_url\"]" ] }, { "cell_type": "code", - "execution_count": 20, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -362,27 +171,16 @@ }, { "cell_type": "code", - "execution_count": 21, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "array([0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0])" - ] - }, - "execution_count": 21, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "mlb_labels[0]" ] }, { "cell_type": "code", - "execution_count": 22, + "execution_count": null, "metadata": { "tags": [] }, @@ -397,20 +195,9 @@ }, { "cell_type": "code", - "execution_count": 23, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAjsAAAGdCAYAAAD0e7I1AAAAOXRFWHRTb2Z0d2FyZQBNYXRwbG90bGliIHZlcnNpb24zLjUuMywgaHR0cHM6Ly9tYXRwbG90bGliLm9yZy/NK7nSAAAACXBIWXMAAA9hAAAPYQGoP6dpAAAq30lEQVR4nO3df3RU9Z3/8ddIfkGaREjqTAYihhpUTNQ0WGq0BZsQloqsh7VU8QfdpT1YJBKBopSuRg4mLrtCtlFx8bCEyrLx+IOu7rFKopgum7rSEWqCFvWYIuCM2bZxAhonkXy+f/jlboeEEEKYmXzyfJxzz2E+953hfT9yzOvcez/3uowxRgAAAJY6J9oNAAAAnE2EHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1eKi3UAs6O7u1kcffaSUlBS5XK5otwMAAPrBGKMjR47I6/XqnHNOfv6GsCPpo48+UlZWVrTbAAAAA3Dw4EGNGzfupPsJO5JSUlIkfTlZqampUe4GAAD0R3t7u7Kyspzf4ydD2JGcS1epqamEHQAAhphT3YLCDcoAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAVouLdgMA0Je8/AL5/YFT1mVmetS0xxeBjgAMNYQdADHN7w+oePUzp6yrv+/GCHQDYCjiMhYAALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYLeph5/Dhw7r11luVnp6uUaNG6YorrpDP53P2G2NUXl4ur9erkSNHatq0adq3b1/Yd4RCIZWWliojI0PJycmaPXu2Dh06FOlDAQAAMSiqYaetrU1XX3214uPj9atf/Upvv/22Hn74YZ177rlOzdq1a7Vu3To98sgj2r17tzwej6ZPn64jR444NWVlZdq+fbtqa2u1a9cuHT16VLNmzdKxY8eicFQAACCWxEXzL/+Hf/gHZWVlafPmzc7YBRdc4PzZGKOqqiqtWrVKc+bMkSRt2bJFbrdb27Zt08KFCxUMBrVp0yY9+eSTKi4uliRt3bpVWVlZqq+v14wZMyJ6TAAAILZE9czO888/r8mTJ+t73/uezjvvPOXn5+uJJ55w9re0tCgQCKikpMQZS0xM1NSpU9XY2ChJ8vl86urqCqvxer3Kzc11ak4UCoXU3t4etgEAADtFNex88MEH2rBhg3JycvTyyy/rjjvu0F133aVf/OIXkqRAICBJcrvdYT/ndrudfYFAQAkJCRo9evRJa05UWVmptLQ0Z8vKyhrsQwMAADEiqmGnu7tbX//611VRUaH8/HwtXLhQP/rRj7Rhw4awOpfLFfbZGNNj7ER91axcuVLBYNDZDh48eGYHAgAAYlZUw05mZqYmTZoUNnbJJZfoww8/lCR5PB5J6nGGprW11Tnb4/F41NnZqba2tpPWnCgxMVGpqalhGwAAsFNUw87VV1+t/fv3h429++67Gj9+vCQpOztbHo9HdXV1zv7Ozk41NDSosLBQklRQUKD4+PiwGr/fr+bmZqcGAAAMX1FdjXX33XersLBQFRUVmjt3rt544w1t3LhRGzdulPTl5auysjJVVFQoJydHOTk5qqio0KhRozRv3jxJUlpamhYsWKBly5YpPT1dY8aM0fLly5WXl+eszgIAAMNXVMPOlVdeqe3bt2vlypVavXq1srOzVVVVpVtuucWpWbFihTo6OrRo0SK1tbVpypQp2rFjh1JSUpya9evXKy4uTnPnzlVHR4eKiopUU1OjESNGROOwAABADHEZY0y0m4i29vZ2paWlKRgMcv8OEGMyPGNVvPqZU9bV33ej/hg4HIGOAMSK/v7+jvrrIgAAAM4mwg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKwWF+0GAAw9efkF8vsDfdZkZnrUtMcXoY4A4OQIOwBOm98fUPHqZ/qsqb/vxgh1AwB94zIWAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKtFNeyUl5fL5XKFbR6Px9lvjFF5ebm8Xq9GjhypadOmad++fWHfEQqFVFpaqoyMDCUnJ2v27Nk6dOhQpA8FGBLy8guU4Rnb55aXXxDtNgFgUMVFu4FLL71U9fX1zucRI0Y4f167dq3WrVunmpoaTZw4UWvWrNH06dO1f/9+paSkSJLKysr0wgsvqLa2Vunp6Vq2bJlmzZoln88X9l0AJL8/oOLVz/RZU3/fjRHqBgAiI+phJy4uLuxsznHGGFVVVWnVqlWaM2eOJGnLli1yu93atm2bFi5cqGAwqE2bNunJJ59UcXGxJGnr1q3KyspSfX29ZsyYEdFjAQAAsSfq9+y899578nq9ys7O1k033aQPPvhAktTS0qJAIKCSkhKnNjExUVOnTlVjY6MkyefzqaurK6zG6/UqNzfXqQEAAMNbVM/sTJkyRb/4xS80ceJEffzxx1qzZo0KCwu1b98+BQIBSZLb7Q77GbfbrQMHDkiSAoGAEhISNHr06B41x3++N6FQSKFQyPnc3t4+WIcEAABiTFTDzsyZM50/5+Xl6aqrrtLXvvY1bdmyRd/85jclSS6XK+xnjDE9xk50qprKyko98MADZ9A5AAAYKqJ+GesvJScnKy8vT++9955zH8+JZ2haW1udsz0ej0ednZ1qa2s7aU1vVq5cqWAw6GwHDx4c5CMBAACxIqbCTigU0jvvvKPMzExlZ2fL4/Gorq7O2d/Z2amGhgYVFhZKkgoKChQfHx9W4/f71dzc7NT0JjExUampqWEbAACwU1QvYy1fvlzXX3+9zj//fLW2tmrNmjVqb2/X/Pnz5XK5VFZWpoqKCuXk5CgnJ0cVFRUaNWqU5s2bJ0lKS0vTggULtGzZMqWnp2vMmDFavny58vLynNVZAABgeItq2Dl06JBuvvlm/fGPf9RXv/pVffOb39Trr7+u8ePHS5JWrFihjo4OLVq0SG1tbZoyZYp27NjhPGNHktavX6+4uDjNnTtXHR0dKioqUk1NDc/YAQAAkqIcdmpra/vc73K5VF5ervLy8pPWJCUlqbq6WtXV1YPcHQAAsEFM3bMDAAAw2Ag7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWi4t2A8Bwl5dfIL8/0GdNZqZHTXt8EeoIAOxC2AGizO8PqHj1M33W1N93Y4S6AQD7cBkLAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqLD0HgL/Ac48A+xB2AOAv8NwjwD5cxgIAAFYj7AAAAKsRdgAAgNViJuxUVlbK5XKprKzMGTPGqLy8XF6vVyNHjtS0adO0b9++sJ8LhUIqLS1VRkaGkpOTNXv2bB06dCjC3QMAgFgVE2Fn9+7d2rhxoy677LKw8bVr12rdunV65JFHtHv3bnk8Hk2fPl1HjhxxasrKyrR9+3bV1tZq165dOnr0qGbNmqVjx45F+jAAAEAMinrYOXr0qG655RY98cQTGj16tDNujFFVVZVWrVqlOXPmKDc3V1u2bNFnn32mbdu2SZKCwaA2bdqkhx9+WMXFxcrPz9fWrVvV1NSk+vr6aB0Shri8/AJleMb2ueXlF0S7TQBAP0V96fmdd96p6667TsXFxVqzZo0z3tLSokAgoJKSEmcsMTFRU6dOVWNjoxYuXCifz6eurq6wGq/Xq9zcXDU2NmrGjBm9/p2hUEihUMj53N7efhaODEMVS48BwC5RDTu1tbV68803tXv37h77AoEvH+rldrvDxt1utw4cOODUJCQkhJ0ROl5z/Od7U1lZqQceeOBM2wcAAENA1C5jHTx4UEuWLNHWrVuVlJR00jqXyxX22RjTY+xEp6pZuXKlgsGgsx08ePD0mgcAAENG1MKOz+dTa2urCgoKFBcXp7i4ODU0NOjnP/+54uLinDM6J56haW1tdfZ5PB51dnaqra3tpDW9SUxMVGpqatgGAADsFLWwU1RUpKamJu3du9fZJk+erFtuuUV79+7VhAkT5PF4VFdX5/xMZ2enGhoaVFhYKEkqKChQfHx8WI3f71dzc7NTAwAAhreo3bOTkpKi3NzcsLHk5GSlp6c742VlZaqoqFBOTo5ycnJUUVGhUaNGad68eZKktLQ0LViwQMuWLVN6errGjBmj5cuXKy8vT8XFxRE/JgAAEHuivhqrLytWrFBHR4cWLVqktrY2TZkyRTt27FBKSopTs379esXFxWnu3Lnq6OhQUVGRampqNGLEiCh2DqA/+vOG8WAwGKFuANgqpsLOa6+9FvbZ5XKpvLxc5eXlJ/2ZpKQkVVdXq7q6+uw2B2DQ9WeZ/9OlRRHqBoCtov5QQQAAgLOJsAMAAKxG2AEAAFaLqXt2AGCggsGgMjxj+6zJzPSoaY8vQh0BiBWEHQBW6O7u5p1mAHrFZSwAAGA1wg4AALAaYQcAAFiNsAMAAKw2oLAzYcIE/elPf+ox/sknn2jChAln3BQAAMBgGVDY+cMf/qBjx471GA+FQjp8+PAZNwUAADBYTmvp+fPPP+/8+eWXX1ZaWprz+dixY3rllVd0wQUXDFpzAAAAZ+q0ws4NN9wg6csXdM6fPz9sX3x8vC644AI9/PDDg9YcAADAmTqtsNPd3S1Jys7O1u7du5WRkXFWmgIAABgsA3qCcktLy2D3AQAAcFYM+HURr7zyil555RW1trY6Z3yO+9d//dczbgwAAGAwDCjsPPDAA1q9erUmT56szMxMuVyuwe4LAABgUAwo7Dz++OOqqanRbbfdNtj9AAAADKoBhZ3Ozk4VFhYOdi8ALBIMBpXhGXvKGgA42wYUdn74wx9q27Zt+vu///vB7geAJbq7u1W8+pk+a54uLYpQNwCGswGFnc8//1wbN25UfX29LrvsMsXHx4ftX7du3aA0BwAAcKYGFHbeeustXXHFFZKk5ubmsH3crAwAAGLJgMLOzp07B7sPAACAs2JALwIFAAAYKgZ0Zufaa6/t83LVq6++OuCGAAAABtOAws7x+3WO6+rq0t69e9Xc3NzjBaEAAADRNKCws379+l7Hy8vLdfTo0TNqCADOFp79AwxPA343Vm9uvfVWfeMb39A//dM/DebXAsCg4Nk/wPA0qDco/+Y3v1FSUtJgfiUAAMAZGdCZnTlz5oR9NsbI7/frt7/9LU9VBoY4LvUAsM2Awk5aWlrY53POOUcXXXSRVq9erZKSkkFpDEB0cKkHgG0GFHY2b9482H0AAACcFWd0g7LP59M777wjl8ulSZMmKT8/f7D6AgAAGBQDCjutra266aab9Nprr+ncc8+VMUbBYFDXXnutamtr9dWvfnWw+wQAABiQAa3GKi0tVXt7u/bt26c///nPamtrU3Nzs9rb23XXXXcNdo8AAAADNqAzOy+99JLq6+t1ySWXOGOTJk3So48+yg3KAAAgpgzozE53d7fi4+N7jMfHx6u7u/uMmwIAABgsAwo73/nOd7RkyRJ99NFHztjhw4d19913q6iIJakAACB2DCjsPPLIIzpy5IguuOACfe1rX9OFF16o7OxsHTlyRNXV1YPdIwAAwIAN6J6drKwsvfnmm6qrq9Pvf/97GWM0adIkFRcXD3Z/AAAAZ+S0ws6rr76qxYsX6/XXX1dqaqqmT5+u6dOnS/ry8fGXXnqpHn/8cX3rW986K80CQ0lefoH8/sAp63j1AgCcXacVdqqqqvSjH/1IqampPfalpaVp4cKFWrduHWEHkOT3B0752gWJVy8AwNl2Wvfs/O53v9Nf/dVfnXR/SUmJfD7fGTcFAAAwWE4r7Hz88ce9Ljk/Li4uTv/7v/97xk0BAAAMltMKO2PHjlVTU9NJ97/11lvKzMzs9/dt2LBBl112mVJTU5WamqqrrrpKv/rVr5z9xhiVl5fL6/Vq5MiRmjZtmvbt2xf2HaFQSKWlpcrIyFBycrJmz56tQ4cOnc5hAVbIyy9Qhmdsnxv3BwEYjk7rnp3vfve7uu+++zRz5kwlJSWF7evo6ND999+vWbNm9fv7xo0bp4ceekgXXnihJGnLli3667/+a+3Zs0eXXnqp1q5dq3Xr1qmmpkYTJ07UmjVrNH36dO3fv18pKSmSpLKyMr3wwguqra1Venq6li1bplmzZsnn82nEiBGnc3jAkNafe4S4PwjAcHRaYednP/uZnnvuOU2cOFGLFy/WRRddJJfLpXfeeUePPvqojh07plWrVvX7+66//vqwzw8++KA2bNig119/XZMmTVJVVZVWrVqlOXPmSPoyDLndbm3btk0LFy5UMBjUpk2b9OSTTzrL3rdu3aqsrCzV19drxowZp3N4AADAQqcVdtxutxobG/XjH/9YK1eulDFGkuRyuTRjxgw99thjcrvdA2rk2LFjevrpp/Xpp5/qqquuUktLiwKBQNi7thITEzV16lQ1NjZq4cKF8vl86urqCqvxer3Kzc1VY2PjScNOKBRSKBRyPre3tw+oZwAAEPtO+6GC48eP14svvqi2tja9//77MsYoJydHo0ePHlADTU1Nuuqqq/T555/rK1/5irZv365JkyapsbFRknqEJ7fbrQMHDkiSAoGAEhISevzdbrdbgcDJn29SWVmpBx54YED9AtEQDAaV4Rl7yhoAQE8DeoKyJI0ePVpXXnnlGTdw0UUXae/evfrkk0/07LPPav78+WpoaHD2u1yusHpjTI+xE52qZuXKlVq6dKnzub29XVlZWQM8AuDs6+7u5n4cABigAb0bazAlJCTowgsv1OTJk1VZWanLL79c//zP/yyPxyNJPc7QtLa2Omd7PB6POjs71dbWdtKa3iQmJjorwI5vAADATlEPOycyxigUCik7O1sej0d1dXXOvs7OTjU0NKiwsFCSVFBQoPj4+LAav9+v5uZmpwYAAAxvA76MNRh++tOfaubMmcrKytKRI0dUW1ur1157TS+99JJcLpfKyspUUVGhnJwc5eTkqKKiQqNGjdK8efMkffmKigULFmjZsmVKT0/XmDFjtHz5cuXl5fFSUgAAICnKYefjjz/WbbfdJr/fr7S0NF122WV66aWXnJeLrlixQh0dHVq0aJHa2to0ZcoU7dixw3nGjiStX79ecXFxmjt3rjo6OlRUVKSamhqesQMAACRFOexs2rSpz/0ul0vl5eUqLy8/aU1SUpKqq6tVXV09yN0BAAAbxNw9OwAAAIOJsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYLWoPmcH6K+8/AL5/Sd/k70kZWZ61LTHF6GOAABDBWEHQ4LfHzjlW7/r77sxQt0AAIYSLmMBAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNpefAAASDQWV4xp6yBgAQfYQdYAC6u7tP+dyfp0uLItQNAKAvXMYCAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1VmMBwGnqz6MHMjM9atrji1BHAPpC2AGA09SfRw/U33djhLoBcCpcxgIAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Xg3FoaVvPwC+f2BPmuCwWCEugEARAJhB8OK3x845Qscny4tilA3AIBI4DIWAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqUQ07lZWVuvLKK5WSkqLzzjtPN9xwg/bv3x9WY4xReXm5vF6vRo4cqWnTpmnfvn1hNaFQSKWlpcrIyFBycrJmz56tQ4cORfJQAABAjIpq2GloaNCdd96p119/XXV1dfriiy9UUlKiTz/91KlZu3at1q1bp0ceeUS7d++Wx+PR9OnTdeTIEaemrKxM27dvV21trXbt2qWjR49q1qxZOnbsWDQOCwAAxJCoPkH5pZdeCvu8efNmnXfeefL5fPr2t78tY4yqqqq0atUqzZkzR5K0ZcsWud1ubdu2TQsXLlQwGNSmTZv05JNPqri4WJK0detWZWVlqb6+XjNmzIj4cQEAgNgRU/fsHH8n0ZgxYyRJLS0tCgQCKikpcWoSExM1depUNTY2SpJ8Pp+6urrCarxer3Jzc52aE4VCIbW3t4dtAADATjETdowxWrp0qa655hrl5uZKkgKBL1/Y6Ha7w2rdbrezLxAIKCEhQaNHjz5pzYkqKyuVlpbmbFlZWYN9OAAAIEbETNhZvHix3nrrLf37v/97j30ulyvsszGmx9iJ+qpZuXKlgsGgsx08eHDgjQMAgJgWE2GntLRUzz//vHbu3Klx48Y54x6PR5J6nKFpbW11zvZ4PB51dnaqra3tpDUnSkxMVGpqatgGAADsFNWwY4zR4sWL9dxzz+nVV19VdnZ22P7s7Gx5PB7V1dU5Y52dnWpoaFBhYaEkqaCgQPHx8WE1fr9fzc3NTg0AABi+oroa684779S2bdv0H//xH0pJSXHO4KSlpWnkyJFyuVwqKytTRUWFcnJylJOTo4qKCo0aNUrz5s1zahcsWKBly5YpPT1dY8aM0fLly5WXl+eszgIAAMNXVMPOhg0bJEnTpk0LG9+8ebN+8IMfSJJWrFihjo4OLVq0SG1tbZoyZYp27NihlJQUp379+vWKi4vT3Llz1dHRoaKiItXU1GjEiBGROhTEgGAwqAzP2FPWAACGl6iGHWPMKWtcLpfKy8tVXl5+0pqkpCRVV1erurp6ELvDUNPd3a3i1c/0WfN0aVGEugEAxIqYuEEZAADgbCHsAAAAq0X1MhYA2Ko/95BlZnrUtMcXoY6A4YuwAwBnQX/uIau/78YIdQMMb1zGAgAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNVZjAUCUsDwdiAzCDgBECcvTgcjgMhYAALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABWI+wAAACrEXYAAIDVCDsAAMBqhB0AAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKvFRbsBIC+/QH5/oM+aYDAYoW4AALYh7CDq/P6Ailc/02fN06VFEeoGAGAbLmMBAACrRTXs/PrXv9b1118vr9crl8ulX/7yl2H7jTEqLy+X1+vVyJEjNW3aNO3bty+sJhQKqbS0VBkZGUpOTtbs2bN16NChCB4FAJw9wWBQGZ6xfW55+QX9+q68/IJB+y5gKInqZaxPP/1Ul19+uf72b/9Wf/M3f9Nj/9q1a7Vu3TrV1NRo4sSJWrNmjaZPn679+/crJSVFklRWVqYXXnhBtbW1Sk9P17JlyzRr1iz5fD6NGDEi0ocEAIOqu7v7lJd56++7sV/f1Z9Lxv39LmAoiWrYmTlzpmbOnNnrPmOMqqqqtGrVKs2ZM0eStGXLFrndbm3btk0LFy5UMBjUpk2b9OSTT6q4uFiStHXrVmVlZam+vl4zZsyI2LEAAIDYFLP37LS0tCgQCKikpMQZS0xM1NSpU9XY2ChJ8vl86urqCqvxer3Kzc11anB29Od0OKfEAQCxIGZXYwUCXy5FdrvdYeNut1sHDhxwahISEjR69OgeNcd/vjehUEihUMj53N7ePlhtDxv9OR0ucUocABB9MXtm5ziXyxX22RjTY+xEp6qprKxUWlqas2VlZQ1KrwAAIPbEbNjxeDyS1OMMTWtrq3O2x+PxqLOzU21tbSet6c3KlSsVDAad7eDBg4PcPQAAiBUxG3ays7Pl8XhUV1fnjHV2dqqhoUGFhYWSpIKCAsXHx4fV+P1+NTc3OzW9SUxMVGpqatgGAADsFNV7do4ePar333/f+dzS0qK9e/dqzJgxOv/881VWVqaKigrl5OQoJydHFRUVGjVqlObNmydJSktL04IFC7Rs2TKlp6drzJgxWr58ufLy8pzVWQAAYHiLatj57W9/q2uvvdb5vHTpUknS/PnzVVNToxUrVqijo0OLFi1SW1ubpkyZoh07djjP2JGk9evXKy4uTnPnzlVHR4eKiopUU1PDM3YAAICkKIedadOmyRhz0v0ul0vl5eUqLy8/aU1SUpKqq6tVXV19FjrEmTr+9NdT1QAAcLbE7NJz2KE/T3/lJZ8AgLMpZm9QBgAAGAyEHQAAYDXCDgAAsBr37ADAENefhQDH64DhiLADAENcfxYCSCwGwPDFZSwAAGA1wg4AALAaYQcAAFiNsAMAAKxG2AEAAFYj7AAAAKsRdgAAgNUIOwAAwGqEHQAAYDXCDgAAsBphBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1eKi3QAAIHYEg0FleMb2WZOZ6VHTHl+EOgLOHGEHPeTlF8jvD/RZEwwGI9QNgEjq7u5W8epn+qypv+/GCHUDDA7CDnrw+wOn/J/d06VFEeoGAIAzwz07AADAaoQdAABgNcIOAACwGmEHAABYjRuUAQCDrj+rOlnCjkgh7AAABl1/VnWyhB2RQtgZZniGDgBguCHsDDM8QwcAMNxwgzIAALAaZ3YAAKelP+/P4nI4YglhBwBwWvrz/iwuhyOWcBkLAABYjbADAACsxmUsAEDM4uGEGAyEHYvwDB0AtuHhhBgMhB2L8AwdAEMJq7oQKYQdAEBUsKoLkcINygAAwGqEHQAAYDUuYw0R3HwMAMDAWBN2HnvsMf3jP/6j/H6/Lr30UlVVVelb3/pWtNsaNNx8DADAwFgRdp566imVlZXpscce09VXX61/+Zd/0cyZM/X222/r/PPPj3Z7p8RZGwAYuP6s6uJZPMObFWFn3bp1WrBggX74wx9KkqqqqvTyyy9rw4YNqqysjHJ3p8ZZGwAYuP6s6np2yfRTBqJPP/1UycnJp/z7CE5Dz5APO52dnfL5fLr33nvDxktKStTY2Njrz4RCIYVCIefz8bMm7e3tZ6/RPnR3d6ur49M+a4wxQ64mFnuKtZpY7Gko1sRiT7FWE4s9RbLm2LFjmrpyS581239yvb67pu/QJEk7H7xtUH5ffPOabysQ+LjPGo/Hrdd3/XpI/V2RdPy/gzGm70IzxB0+fNhIMv/93/8dNv7ggw+aiRMn9voz999/v5HExsbGxsbGZsF28ODBPrPCkD+zc5zL5Qr7bIzpMXbcypUrtXTpUufzJ598ovHjx+vDDz9UWlraWe3TFu3t7crKytLBgweVmpoa7XaGDObt9DFnA8O8nT7mbGCiOW/GGB05ckRer7fPuiEfdjIyMjRixAgFAuE3+La2tsrtdvf6M4mJiUpMTOwxnpaWxj/w05SamsqcDQDzdvqYs4Fh3k4fczYw0Zq3/pykGPIPFUxISFBBQYHq6urCxuvq6lRYWBilrgAAQKwY8md2JGnp0qW67bbbNHnyZF111VXauHGjPvzwQ91xxx3Rbg0AAESZFWHn+9//vv70pz9p9erV8vv9ys3N1Ysvvqjx48f36+cTExN1//3393ppC71jzgaGeTt9zNnAMG+njzkbmKEwby5jTrVeCwAAYOga8vfsAAAA9IWwAwAArEbYAQAAViPsAAAAqw37sPPYY48pOztbSUlJKigo0H/9139Fu6WYUllZqSuvvFIpKSk677zzdMMNN2j//v1hNcYYlZeXy+v1auTIkZo2bZr27dsXpY5jT2VlpVwul8rKypwx5qx3hw8f1q233qr09HSNGjVKV1xxhXy+/3vhIvMW7osvvtDPfvYzZWdna+TIkZowYYJWr16t7u5up4Y5k37961/r+uuvl9frlcvl0i9/+cuw/f2Zo1AopNLSUmVkZCg5OVmzZ8/WoUOHIngUkdXXnHV1demee+5RXl6ekpOT5fV6dfvtt+ujjz4K+46YmrMzfTfVUFZbW2vi4+PNE088Yd5++22zZMkSk5ycbA4cOBDt1mLGjBkzzObNm01zc7PZu3evue6668z5559vjh496tQ89NBDJiUlxTz77LOmqanJfP/73zeZmZmmvb09ip3HhjfeeMNccMEF5rLLLjNLlixxxpmznv785z+b8ePHmx/84Afmf/7nf0xLS4upr68377//vlPDvIVbs2aNSU9PN//5n/9pWlpazNNPP22+8pWvmKqqKqeGOTPmxRdfNKtWrTLPPvuskWS2b98etr8/c3THHXeYsWPHmrq6OvPmm2+aa6+91lx++eXmiy++iPDRREZfc/bJJ5+Y4uJi89RTT5nf//735je/+Y2ZMmWKKSgoCPuOWJqzYR12vvGNb5g77rgjbOziiy829957b5Q6in2tra1GkmloaDDGGNPd3W08Ho956KGHnJrPP//cpKWlmccffzxabcaEI0eOmJycHFNXV2emTp3qhB3mrHf33HOPueaaa066n3nr6brrrjN/93d/FzY2Z84cc+uttxpjmLPenPiLuz9z9Mknn5j4+HhTW1vr1Bw+fNicc8455qWXXopY79HSW0A80RtvvGEkOScLYm3Ohu1lrM7OTvl8PpWUlISNl5SUqLGxMUpdxb5gMChJGjNmjCSppaVFgUAgbB4TExM1derUYT+Pd955p6677joVFxeHjTNnvXv++ec1efJkfe9739N5552n/Px8PfHEE85+5q2na665Rq+88oreffddSdLvfvc77dq1S9/97nclMWf90Z858vl86urqCqvxer3Kzc1lHv+/YDAol8ulc889V1LszZkVT1AeiD/+8Y86duxYj5eFut3uHi8VxZeMMVq6dKmuueYa5ebmSpIzV73N44EDByLeY6yora3Vm2++qd27d/fYx5z17oMPPtCGDRu0dOlS/fSnP9Ubb7yhu+66S4mJibr99tuZt17cc889CgaDuvjiizVixAgdO3ZMDz74oG6++WZJ/Fvrj/7MUSAQUEJCgkaPHt2jht8X0ueff657771X8+bNc14EGmtzNmzDznEulyvsszGmxxi+tHjxYr311lvatWtXj33M4/85ePCglixZoh07digpKemkdcxZuO7ubk2ePFkVFRWSpPz8fO3bt08bNmzQ7bff7tQxb//nqaee0tatW7Vt2zZdeuml2rt3r8rKyuT1ejV//nynjjk7tYHMEfP45c3KN910k7q7u/XYY4+dsj5aczZsL2NlZGRoxIgRPRJma2trj4QPqbS0VM8//7x27typcePGOeMej0eSmMe/4PP51NraqoKCAsXFxSkuLk4NDQ36+c9/rri4OGdemLNwmZmZmjRpUtjYJZdcog8//FAS/9Z685Of/ET33nuvbrrpJuXl5em2227T3XffrcrKSknMWX/0Z448Ho86OzvV1tZ20prhqKurS3PnzlVLS4vq6uqcszpS7M3ZsA07CQkJKigoUF1dXdh4XV2dCgsLo9RV7DHGaPHixXruuef06quvKjs7O2x/dna2PB5P2Dx2dnaqoaFh2M5jUVGRmpqatHfvXmebPHmybrnlFu3du1cTJkxgznpx9dVX93iswbvvvuu80Jd/az199tlnOuec8P+Njxgxwll6zpydWn/mqKCgQPHx8WE1fr9fzc3Nw3Yejwed9957T/X19UpPTw/bH3NzFvFbomPI8aXnmzZtMm+//bYpKyszycnJ5g9/+EO0W4sZP/7xj01aWpp57bXXjN/vd7bPPvvMqXnooYdMWlqaee6550xTU5O5+eabh93S1lP5y9VYxjBnvXnjjTdMXFycefDBB817771n/u3f/s2MGjXKbN261alh3sLNnz/fjB071ll6/txzz5mMjAyzYsUKp4Y5+3Jl5J49e8yePXuMJLNu3TqzZ88eZ+VQf+bojjvuMOPGjTP19fXmzTffNN/5znesXnre15x1dXWZ2bNnm3Hjxpm9e/eG/W4IhULOd8TSnA3rsGOMMY8++qgZP368SUhIMF//+tedJdX4kqRet82bNzs13d3d5v777zcej8ckJiaab3/726apqSl6TcegE8MOc9a7F154weTm5prExERz8cUXm40bN4btZ97Ctbe3myVLlpjzzz/fJCUlmQkTJphVq1aF/cJhzozZuXNnr/8fmz9/vjGmf3PU0dFhFi9ebMaMGWNGjhxpZs2aZT788MMoHE1k9DVnLS0tJ/3dsHPnTuc7YmnOXMYYE7nzSAAAAJE1bO/ZAQAAwwNhBwAAWI2wAwAArEbYAQAAViPsAAAAqxF2AACA1Qg7AADAaoQdAABgNcIOAACwGmEHAABYjbADAACsRtgBAABW+38b+naw1pEinAAAAABJRU5ErkJggg==\n", - "text/plain": [ - "
" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "token_counts = []\n", "\n", @@ -435,7 +222,7 @@ }, { "cell_type": "code", - "execution_count": 24, + "execution_count": null, "metadata": { "tags": [] }, @@ -466,7 +253,7 @@ }, { "cell_type": "code", - "execution_count": 25, + "execution_count": null, "metadata": { "tags": [] }, @@ -477,7 +264,7 @@ }, { "cell_type": "code", - "execution_count": 26, + "execution_count": null, "metadata": { "tags": [] }, @@ -494,18 +281,9 @@ }, { "cell_type": "code", - "execution_count": 27, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "(6492, 6)\n", - "(1620, 6)\n" - ] - } - ], + "outputs": [], "source": [ "from iterstrat.ml_stratifiers import MultilabelStratifiedKFold\n", "\n", @@ -522,32 +300,9 @@ }, { "cell_type": "code", - "execution_count": 28, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "{0: 3.017203135650159,\n", - " 1: 2.4823691788825464,\n", - " 2: 1.941736822154725,\n", - " 3: 6.172646581418988,\n", - " 4: 1.8759436445637834,\n", - " 5: 1.0,\n", - " 6: 1.75011143349181,\n", - " 7: 1.2730236191357969,\n", - " 8: 4.849237178079731,\n", - " 9: 2.4857419672410703,\n", - " 10: 1.6324480531290084,\n", - " 11: 2.0033774839735035,\n", - " 12: 1.3688883733394182}" - ] - }, - "execution_count": 28, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "occs = np.sum(mlb_labels[df_train.index],\n", " axis=0)\n", @@ -562,7 +317,7 @@ }, { "cell_type": "code", - "execution_count": 29, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -589,7 +344,7 @@ }, { "cell_type": "code", - "execution_count": 30, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -631,7 +386,7 @@ }, { "cell_type": "code", - "execution_count": 31, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -647,7 +402,7 @@ }, { "cell_type": "code", - "execution_count": 32, + "execution_count": null, "metadata": {}, "outputs": [], "source": [ @@ -657,7 +412,7 @@ }, { "cell_type": "code", - "execution_count": 33, + "execution_count": null, "metadata": { "scrolled": true, "tags": [] @@ -672,23 +427,14 @@ " fp16=True,\n", " evaluation_strategy = \"epoch\",\n", " save_strategy = \"no\",\n", - " #learning_rate=2e-5,\n", " per_device_train_batch_size=batch_size,\n", " per_device_eval_batch_size=batch_size*2,\n", " num_train_epochs=4,\n", - " #weight_decay=0.01,\n", " load_best_model_at_end=False,\n", " metric_for_best_model=\"macro f1\",\n", - " # eval_steps = step_size,\n", - " # save_steps = step_size,\n", - " # logging_steps = step_size,\n", " seed = 42,\n", " data_seed = 42,\n", " dataloader_num_workers = 0,\n", - " #lr_scheduler_type = 'linear',\n", - " #warmup_steps=0, # number of warmup steps for learning rate scheduler\n", - " #weight_decay=0, # strength of weight decay\n", - " #save_total_limit=1, # limit the total amount of checkpoints. Deletes the older checkpoints.\n", " full_determinism = True,\n", " group_by_length = True\n", " )\n", @@ -723,425 +469,9 @@ }, { "cell_type": "code", - "execution_count": 34, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "Using the `WANDB_DISABLED` environment variable is deprecated and will be removed in v5. Use the --report_to flag to control the integrations used for logging result (for instance --report_to none).\n", - "loading configuration file config.json from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\config.json\n", - "Model config BertConfig {\n", - " \"_name_or_path\": \"dbmdz/bert-base-turkish-128k-uncased\",\n", - " \"attention_probs_dropout_prob\": 0.1,\n", - " \"classifier_dropout\": null,\n", - " \"hidden_act\": \"gelu\",\n", - " \"hidden_dropout_prob\": 0.1,\n", - " \"hidden_size\": 768,\n", - " \"id2label\": {\n", - " \"0\": \"Lojistik\",\n", - " \"1\": \"Elektrik Kaynagi\",\n", - " \"2\": \"Arama Ekipmani\",\n", - " \"3\": \"Cenaze\",\n", - " \"4\": \"Giysi\",\n", - " \"5\": \"Enkaz Kaldirma\",\n", - " \"6\": \"Isinma\",\n", - " \"7\": \"Bar\\u0131nma\",\n", - " \"8\": \"Tuvalet\",\n", - " \"9\": \"Su\",\n", - " \"10\": \"Yemek\",\n", - " \"11\": \"Saglik\",\n", - " \"12\": \"Alakasiz\"\n", - " },\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 3072,\n", - " \"label2id\": {\n", - " \"Alakasiz\": 12,\n", - " \"Arama Ekipmani\": 2,\n", - " \"Bar\\u0131nma\": 7,\n", - " \"Cenaze\": 3,\n", - " \"Elektrik Kaynagi\": 1,\n", - " \"Enkaz Kaldirma\": 5,\n", - " \"Giysi\": 4,\n", - " \"Isinma\": 6,\n", - " \"Lojistik\": 0,\n", - " \"Saglik\": 11,\n", - " \"Su\": 9,\n", - " \"Tuvalet\": 8,\n", - " \"Yemek\": 10\n", - " },\n", - " \"layer_norm_eps\": 1e-12,\n", - " \"max_position_embeddings\": 512,\n", - " \"model_type\": \"bert\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 12,\n", - " \"pad_token_id\": 0,\n", - " \"position_embedding_type\": \"absolute\",\n", - " \"problem_type\": \"multi_label_classification\",\n", - " \"transformers_version\": \"4.24.0\",\n", - " \"type_vocab_size\": 2,\n", - " \"use_cache\": true,\n", - " \"vocab_size\": 128000\n", - "}\n", - "\n", - "loading weights file pytorch_model.bin from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\pytorch_model.bin\n", - "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n", - "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "Using cuda_amp half precision backend\n", - "loading configuration file config.json from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\config.json\n", - "Model config BertConfig {\n", - " \"_name_or_path\": \"dbmdz/bert-base-turkish-128k-uncased\",\n", - " \"attention_probs_dropout_prob\": 0.1,\n", - " \"classifier_dropout\": null,\n", - " \"hidden_act\": \"gelu\",\n", - " \"hidden_dropout_prob\": 0.1,\n", - " \"hidden_size\": 768,\n", - " \"id2label\": {\n", - " \"0\": \"Lojistik\",\n", - " \"1\": \"Elektrik Kaynagi\",\n", - " \"2\": \"Arama Ekipmani\",\n", - " \"3\": \"Cenaze\",\n", - " \"4\": \"Giysi\",\n", - " \"5\": \"Enkaz Kaldirma\",\n", - " \"6\": \"Isinma\",\n", - " \"7\": \"Bar\\u0131nma\",\n", - " \"8\": \"Tuvalet\",\n", - " \"9\": \"Su\",\n", - " \"10\": \"Yemek\",\n", - " \"11\": \"Saglik\",\n", - " \"12\": \"Alakasiz\"\n", - " },\n", - " \"initializer_range\": 0.02,\n", - " \"intermediate_size\": 3072,\n", - " \"label2id\": {\n", - " \"Alakasiz\": 12,\n", - " \"Arama Ekipmani\": 2,\n", - " \"Bar\\u0131nma\": 7,\n", - " \"Cenaze\": 3,\n", - " \"Elektrik Kaynagi\": 1,\n", - " \"Enkaz Kaldirma\": 5,\n", - " \"Giysi\": 4,\n", - " \"Isinma\": 6,\n", - " \"Lojistik\": 0,\n", - " \"Saglik\": 11,\n", - " \"Su\": 9,\n", - " \"Tuvalet\": 8,\n", - " \"Yemek\": 10\n", - " },\n", - " \"layer_norm_eps\": 1e-12,\n", - " \"max_position_embeddings\": 512,\n", - " \"model_type\": \"bert\",\n", - " \"num_attention_heads\": 12,\n", - " \"num_hidden_layers\": 12,\n", - " \"pad_token_id\": 0,\n", - " \"position_embedding_type\": \"absolute\",\n", - " \"problem_type\": \"multi_label_classification\",\n", - " \"transformers_version\": \"4.24.0\",\n", - " \"type_vocab_size\": 2,\n", - " \"use_cache\": true,\n", - " \"vocab_size\": 128000\n", - "}\n", - "\n", - "loading weights file pytorch_model.bin from cache at C:\\Users\\dmg_e/.cache\\huggingface\\hub\\models--dbmdz--bert-base-turkish-128k-uncased\\snapshots\\f5287aecee60f0c597c11c34341cb92d31c0e71b\\pytorch_model.bin\n", - "Some weights of the model checkpoint at dbmdz/bert-base-turkish-128k-uncased were not used when initializing BertForSequenceClassification: ['cls.seq_relationship.bias', 'cls.seq_relationship.weight', 'cls.predictions.decoder.bias', 'cls.predictions.transform.LayerNorm.bias', 'cls.predictions.bias', 'cls.predictions.transform.LayerNorm.weight', 'cls.predictions.transform.dense.weight', 'cls.predictions.transform.dense.bias', 'cls.predictions.decoder.weight']\n", - "- This IS expected if you are initializing BertForSequenceClassification from the checkpoint of a model trained on another task or with another architecture (e.g. initializing a BertForSequenceClassification model from a BertForPreTraining model).\n", - "- This IS NOT expected if you are initializing BertForSequenceClassification from the checkpoint of a model that you expect to be exactly identical (initializing a BertForSequenceClassification model from a BertForSequenceClassification model).\n", - "Some weights of BertForSequenceClassification were not initialized from the model checkpoint at dbmdz/bert-base-turkish-128k-uncased and are newly initialized: ['classifier.bias', 'classifier.weight']\n", - "You should probably TRAIN this model on a down-stream task to be able to use it for predictions and inference.\n", - "C:\\Users\\dmg_e\\anaconda3\\envs\\nlp\\lib\\site-packages\\transformers\\optimization.py:306: FutureWarning: This implementation of AdamW is deprecated and will be removed in a future version. Use the PyTorch implementation torch.optim.AdamW instead, or set `no_deprecation_warning=True` to disable this warning\n", - " warnings.warn(\n", - "***** Running training *****\n", - " Num examples = 6492\n", - " Num Epochs = 4\n", - " Instantaneous batch size per device = 32\n", - " Total train batch size (w. parallel, distributed & accumulation) = 32\n", - " Gradient Accumulation steps = 1\n", - " Total optimization steps = 812\n", - " Number of trainable parameters = 184355341\n" - ] - }, - { - "data": { - "text/html": [ - "\n", - "
\n", - " \n", - " \n", - " [812/812 04:52, Epoch 4/4]\n", - "
\n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - "
StepTraining LossValidation LossMicro f1Macro f1
490.0126000.0089910.5064400.302808
980.0070000.0052090.8160140.629818
1470.0050000.0041400.8515120.737092
1960.0046000.0037300.8615790.764193
2450.0034000.0034180.8679080.740340
2940.0030000.0031930.8772170.777911
3430.0031000.0031970.8602010.749202
3920.0030000.0029110.8804810.779105
4410.0021000.0029650.8821290.814388
4900.0016000.0029630.8881930.820314
5390.0020000.0030010.8883880.828849
5880.0019000.0028730.8872350.830829
6370.0015000.0028130.8882350.837983
6860.0014000.0029150.8927040.833584
7350.0012000.0029020.8926880.857335
7840.0012000.0028600.8970360.864054

" - ], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - }, - { - "name": "stderr", - "output_type": "stream", - "text": [ - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245] due to args.save_total_limit\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-49] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-98] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-147] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245\\pytorch_model.bin\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-196] due to args.save_total_limit\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-245] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343\\pytorch_model.bin\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-294] due to args.save_total_limit\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-343] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-392] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-441] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-490] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-539] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-588] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686\\pytorch_model.bin\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-637] due to args.save_total_limit\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-686] due to args.save_total_limit\n", - "***** Running Evaluation *****\n", - " Num examples = 1620\n", - " Batch size = 64\n", - "Saving model checkpoint to turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\n", - "Configuration saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\\config.json\n", - "Model weights saved in turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784\\pytorch_model.bin\n", - "Deleting older checkpoint [turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-735] due to args.save_total_limit\n", - "\n", - "\n", - "Training completed. Do not forget to share your model on huggingface.co/models =)\n", - "\n", - "\n", - "Loading best model from turkish_multilabel_intent_bert-base-turkish-128k-uncased\\checkpoint-784 (score: 0.8640538586818446).\n" - ] - }, - { - "data": { - "text/plain": [ - "TrainOutput(global_step=812, training_loss=0.003325057638065862, metrics={'train_runtime': 293.4609, 'train_samples_per_second': 88.489, 'train_steps_per_second': 2.767, 'total_flos': 1334598181891200.0, 'train_loss': 0.003325057638065862, 'epoch': 4.0})" - ] - }, - "execution_count": 34, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "training_args = TrainingArguments(\n", " f\"turkish_multilabel_intent_{model_name.split('/')[-1]}\",\n", @@ -1186,126 +516,27 @@ "cell_type": "code", "execution_count": null, "metadata": { - "scrolled": true, "tags": [] }, "outputs": [], - "source": [ - "# training_args = TrainingArguments(\n", - "# f\"turkish_multilabel_intent_{model_name.split('/')[-1]}\",\n", - "# fp16=True,\n", - "# evaluation_strategy = \"steps\",\n", - "# save_strategy = \"steps\",\n", - "# learning_rate=best_run[-1]['learning_rate'],\n", - "# per_device_train_batch_size=32,\n", - "# per_device_eval_batch_size=batch_size*2,\n", - "# num_train_epochs=4,\n", - "# #weight_decay=0.01,\n", - "# load_best_model_at_end=True,\n", - "# metric_for_best_model=\"macro f1\",\n", - "# eval_steps = step_size,\n", - "# save_steps = step_size,\n", - "# logging_steps = step_size,\n", - "# seed = 42,\n", - "# data_seed = 42,\n", - "# dataloader_num_workers = 0,\n", - "# lr_scheduler_type = best_run[-1]['lr_scheduler_type'],\n", - "# warmup_steps=best_run[-1]['warmup_steps'], # number of warmup steps for learning rate scheduler\n", - "# weight_decay=best_run[-1]['weight_decay'], # strength of weight decay\n", - "# save_total_limit=1, # limit the total amount of checkpoints. Deletes the older checkpoints.\n", - "# full_determinism = True,\n", - "# group_by_length = True\n", - "# )\n", - "\n", - "# trainer = ImbalancedTrainer(\n", - "# class_weights=class_weights,\n", - "# model_init=model_init,\n", - "# args=training_args,\n", - "# data_collator=data_collator,\n", - "# train_dataset=IntentDataset(df_train),\n", - "# eval_dataset=IntentDataset(df_val),\n", - "# compute_metrics=compute_metrics,\n", - "# )\n", - "\n", - "# trainer.train()" - ] - }, - { - "cell_type": "code", - "execution_count": 35, - "metadata": { - "tags": [] - }, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "***** Running Prediction *****\n", - " Num examples = 2028\n", - " Batch size = 64\n" - ] - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], "source": [ "preds = trainer.predict(IntentDataset(df_test))" ] }, { "cell_type": "code", - "execution_count": 38, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stderr", - "output_type": "stream", - "text": [ - "***** Running Prediction *****\n", - " Num examples = 1620\n", - " Batch size = 64\n" - ] - }, - { - "data": { - "text/html": [], - "text/plain": [ - "" - ] - }, - "metadata": {}, - "output_type": "display_data" - } - ], + "outputs": [], "source": [ "preds = trainer.predict(IntentDataset(df_val))" ] }, { "cell_type": "code", - "execution_count": 39, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "data": { - "text/plain": [ - "(0.58, 0.8666219455858628)" - ] - }, - "execution_count": 39, - "metadata": {}, - "output_type": "execute_result" - } - ], + "outputs": [], "source": [ "best_thr = -1\n", "best_score = 0.\n", @@ -1321,37 +552,9 @@ }, { "cell_type": "code", - "execution_count": 40, + "execution_count": null, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - " precision recall f1-score support\n", - "\n", - " Lojistik 0.72 0.75 0.74 28\n", - "Elektrik Kaynagi 0.81 0.80 0.80 49\n", - " Arama Ekipmani 0.76 0.83 0.79 103\n", - " Cenaze 1.00 0.67 0.80 3\n", - " Giysi 0.84 0.96 0.89 114\n", - " Enkaz Kaldirma 0.95 0.91 0.93 753\n", - " Isinma 0.84 0.85 0.85 141\n", - " Barınma 0.96 0.97 0.97 365\n", - " Tuvalet 1.00 1.00 1.00 7\n", - " Su 0.85 0.90 0.87 49\n", - " Yemek 0.91 0.97 0.94 173\n", - " Saglik 0.78 0.94 0.85 94\n", - " Alakasiz 0.84 0.83 0.83 294\n", - "\n", - " micro avg 0.89 0.90 0.90 2173\n", - " macro avg 0.87 0.87 0.87 2173\n", - " weighted avg 0.90 0.90 0.90 2173\n", - " samples avg 0.90 0.91 0.90 2173\n", - "\n" - ] - } - ], + "outputs": [], "source": [ "print(classification_report(preds.label_ids.astype(int), (sigmoid(preds.predictions) > 0.53).astype(int), target_names=name2ix.keys(), zero_division=0))" ]