diff --git a/sentence_quality_classifier.ipynb b/sentence_quality_classifier.ipynb index 846803b..8f03f61 100644 --- a/sentence_quality_classifier.ipynb +++ b/sentence_quality_classifier.ipynb @@ -11,18 +11,9 @@ }, { "cell_type": "code", - "execution_count": 22, + "execution_count": 1, "metadata": {}, - "outputs": [ - { - "name": "stdout", - "output_type": "stream", - "text": [ - "The autoreload extension is already loaded. To reload it, use:\n", - " %reload_ext autoreload\n" - ] - } - ], + "outputs": [], "source": [ "%matplotlib inline\n", "%load_ext autoreload\n", @@ -57,10 +48,8 @@ }, { "cell_type": "code", - "execution_count": 23, - "metadata": { - "collapsed": true - }, + "execution_count": 2, + "metadata": {}, "outputs": [], "source": [ "### save config file (don't change)\n", @@ -75,10 +64,8 @@ }, { "cell_type": "code", - "execution_count": 24, - "metadata": { - "collapsed": true - }, + "execution_count": 3, + "metadata": {}, "outputs": [], "source": [ "from tokenizer import get_tokenizer\n", @@ -87,10 +74,8 @@ }, { "cell_type": "code", - "execution_count": 25, - "metadata": { - "collapsed": true - }, + "execution_count": 4, + "metadata": {}, "outputs": [], "source": [ "# load training data\n", @@ -102,26 +87,134 @@ }, { "cell_type": "code", - "execution_count": 39, + "execution_count": 11, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ - "8007\n", "8005\n", - "['SENTENCE', 'LOW_QUALITY', 'TITLE', 'REFERENCE', 'FOREIGN']\n" + "8005\n", + "['LOW_QUALITY', 'SENTENCE', 'REFERENCE', 'FOREIGN', 'TITLE']\n" ] }, { "data": { - "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAFHCAYAAACoKpuzAAAABHNCSVQICAgIfAhkiAAAAAlwSFlz\nAAALEgAACxIB0t1+/AAAADl0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uIDIuMS4wLCBo\ndHRwOi8vbWF0cGxvdGxpYi5vcmcvpW3flQAAIABJREFUeJzt3X2YHXV99/H3x/BkBUzQldIkCmos\nQlsCXYEKd0vhbghoDSpYqGiq1GhvaLW3VcHb+gCmxcu2WFrhMhYErEqpqEQaiylgrSKQABEISLPy\nUJIrkMUgglQq8Ln/mN+yh5N9OPuQM7s7n9d1nWtnvr+ZOd8zyZ7vzsxv5ifbRERE8zyn7gQiIqIe\nKQAREQ2VAhAR0VApABERDZUCEBHRUCkAEREN1XEBkDRL0i2SrizzF0m6R9K68lpY4pJ0rqQ+SbdK\nOqhlG0slbSivpZP/cSIiolM7jGHZdwN3Aru3xN5n+8ttyx0DLCivQ4DzgUMk7QF8BOgFDNwkaaXt\nh8ebfEREjF9HRwCS5gGvAf6hg8WXAJe4cj0wW9JewNHAattby5f+amDxOPOOiIgJ6vQU0KeA9wNP\nt8WXl9M850jaucTmAve3LLOxxIaLR0REDUY9BSTptcAW2zdJOqKl6QzgAWAnYAXwAeDMiSYkaRmw\nDOB5z3ver++7774T3WRERKPcdNNND9nuGW25Tq4BHAa8TtKxwC7A7pL+0fbJpf0JSZ8D/qzMbwLm\nt6w/r8Q2AUe0xb/V/ma2V1AVFHp7e7127doOUoyIiAGS7utkuVFPAdk+w/Y823sDJwLX2D65nNdH\nkoDjgNvLKiuBt5beQIcCj9jeDFwFLJI0R9IcYFGJRUREDcbSC6jdFyT1AALWAe8q8VXAsUAf8Djw\nNgDbWyWdBawpy51pe+sE3j8iIiZAU/lx0DkFFBExdpJust072nK5EzgioqFSACIiGioFICKioVIA\nIiIaKgUgIqKhJtINNCJmgL1P/5e6UwDg3rNfU3cKjZMjgIiIhkoBiIhoqBSAiIiGSgGIiGioFICI\niIZKAYiIaKgUgIiIhkoBiIhoqBSAiIiG6rgASJol6RZJV5b5fSTdIKlP0j9J2qnEdy7zfaV975Zt\nnFHid0k6erI/TEREdG4sRwDvBu5smf8EcI7tlwMPA6eU+CnAwyV+TlkOSftRDSm5P7AYOE/SrIml\nHxER49VRAZA0D3gN8A9lXsCRwJfLIhdTjQsMsKTMU9qPKssvAS61/YTte6iGjDx4Mj5ERESMXadH\nAJ8C3g88XeZfAPzY9pNlfiMwt0zPBe4HKO2PlOWfiQ+xTkREdNmoBUDSa4Ettm/qQj5IWiZpraS1\n/f393XjLiIhG6uQI4DDgdZLuBS6lOvXzt8BsSQOPk54HbCrTm4D5AKX9+cCPWuNDrPMM2yts99ru\n7enpGfMHioiIzoxaAGyfYXue7b2pLuJeY/vNwLXA8WWxpcAVZXplmae0X2PbJX5i6SW0D7AAuHHS\nPklERIzJRAaE+QBwqaSPA7cAF5T4BcDnJfUBW6mKBrbXS7oMuAN4EjjV9lMTeP+IiJiAMRUA298C\nvlWm72aIXjy2fwacMMz6y4HlY00yIiImX+4EjohoqBSAiIiGSgGIiGioFICIiIZKAYiIaKgUgIiI\nhkoBiIhoqBSAiIiGSgGIiGioFICIiIZKAYiIaKgUgIiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIbq\nZFD4XSTdKOn7ktZL+liJXyTpHknrymthiUvSuZL6JN0q6aCWbS2VtKG8lg73nhERsf11MiLYE8CR\nth+TtCPwHUnfKG3vs/3ltuWPoRrvdwFwCHA+cIikPYCPAL2AgZskrbT98GR8kIiIGJtOBoW37cfK\n7I7l5RFWWQJcUta7HpgtaS/gaGC17a3lS381sHhi6UdExHh1dA1A0ixJ64AtVF/iN5Sm5eU0zzmS\ndi6xucD9LatvLLHh4u3vtUzSWklr+/v7x/hxIiKiUx0VANtP2V4IzAMOlvQrwBnAvsCrgD2AD0xG\nQrZX2O613dvT0zMZm4yIiCGMqReQ7R8D1wKLbW8up3meAD4HHFwW2wTMb1ltXokNF4+IiBp00guo\nR9LsMv1c4HeAH5Tz+kgScBxwe1llJfDW0hvoUOAR25uBq4BFkuZImgMsKrGIiKhBJ72A9gIuljSL\nqmBcZvtKSddI6gEErAPeVZZfBRwL9AGPA28DsL1V0lnAmrLcmba3Tt5HiYiIsRi1ANi+FThwiPiR\nwyxv4NRh2i4ELhxjjhERsR3kTuCIiIZKAYiIaKgUgIiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIZK\nAYiIaKgUgIiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIZKAYiIaKgUgIiIhupkRLBdJN0o6fuS1kv6\nWInvI+kGSX2S/knSTiW+c5nvK+17t2zrjBK/S9LR2+tDRUTE6Do5AngCONL2AcBCYHEZ6vETwDm2\nXw48DJxSlj8FeLjEzynLIWk/4ERgf2AxcF4ZZSwiImowagEoA78/VmZ3LC8DRwJfLvGLqcYFBlhS\n5intR5Vxg5cAl9p+wvY9VENGDgwkHxERXdbRNQBJsyStA7YAq4EfAj+2/WRZZCMwt0zPBe4HKO2P\nAC9ojQ+xTut7LZO0VtLa/v7+sX+iiIjoSEcFwPZTthcC86j+at93eyVke4XtXtu9PT092+ttIiIa\nb0y9gGz/GLgW+A1gtqSBQeXnAZvK9CZgPkBpfz7wo9b4EOtERESXddILqEfS7DL9XOB3gDupCsHx\nZbGlwBVlemWZp7RfY9slfmLpJbQPsAC4cbI+SEREjM0Ooy/CXsDFpcfOc4DLbF8p6Q7gUkkfB24B\nLijLXwB8XlIfsJWq5w+210u6DLgDeBI41fZTk/txIiKiU6MWANu3AgcOEb+bIXrx2P4ZcMIw21oO\nLB97mhERMdlyJ3BEREOlAERENFQKQEREQ6UAREQ0VApARERDpQBERDRUCkBEREOlAERENFQKQERE\nQ6UAREQ0VApARERDpQBERDRUCkBEREOlAERENFQKQEREQ3UyIth8SddKukPSeknvLvGPStokaV15\nHduyzhmS+iTdJenolvjiEuuTdPr2+UgREdGJTkYEexJ4r+2bJe0G3CRpdWk7x/ZftS4saT+qUcD2\nB34J+DdJryjNn6YaUnIjsEbSStt3TMYHiYiIselkRLDNwOYy/aikO4G5I6yyBLjU9hPAPWVoyIGR\nw/rKSGJIurQsmwIQEVGDMV0DkLQ31fCQN5TQaZJulXShpDklNhe4v2W1jSU2XLz9PZZJWitpbX9/\n/1jSi4iIMejkFBAAknYFLgfeY/snks4HzgJcfv418PaJJmR7BbACoLe31xPdXgza+/R/qTsFAO49\n+zV1pxARdFgAJO1I9eX/BdtfAbD9YEv7Z4Ery+wmYH7L6vNKjBHiERHRZZ30AhJwAXCn7b9pie/V\nstjrgdvL9ErgREk7S9oHWADcCKwBFkjaR9JOVBeKV07Ox4iIiLHq5AjgMOAtwG2S1pXYB4GTJC2k\nOgV0L/BOANvrJV1GdXH3SeBU208BSDoNuAqYBVxoe/0kfpaIiBiDTnoBfQfQEE2rRlhnObB8iPiq\nkdaLiIjuyZ3AERENlQIQEdFQKQAREQ2VAhAR0VApABERDZUCEBHRUCkAERENlQIQEdFQKQAREQ2V\nAhAR0VApABERDZUCEBHRUCkAERENlQIQEdFQKQAREQ3VyYhg8yVdK+kOSeslvbvE95C0WtKG8nNO\niUvSuZL6yoDxB7Vsa2lZfoOkpdvvY0VExGg6OQJ4Eniv7f2AQ4FTJe0HnA5cbXsBcHWZBziGahjI\nBcAy4HyoCgbwEeAQ4GDgIwNFIyIium/UAmB7s+2by/SjwJ3AXGAJcHFZ7GLguDK9BLjEleuB2WX8\n4KOB1ba32n4YWA0sntRPExERHRvTNQBJewMHAjcAe9reXJoeAPYs03OB+1tW21hiw8UjIqIGHRcA\nSbsClwPvsf2T1jbbphocfsIkLZO0VtLa/v7+ydhkREQMoaMCIGlHqi//L9j+Sgk/WE7tUH5uKfFN\nwPyW1eeV2HDxZ7G9wnav7d6enp6xfJaIiBiDTnoBCbgAuNP237Q0rQQGevIsBa5oib+19AY6FHik\nnCq6ClgkaU65+LuoxCIiogY7dLDMYcBbgNskrSuxDwJnA5dJOgW4D3hTaVsFHAv0AY8DbwOwvVXS\nWcCastyZtrdOyqeIiIgxG7UA2P4OoGGajxpieQOnDrOtC4ELx5JgRERsH7kTOCKioVIAIiIaKgUg\nIqKhUgAiIhoqBSAioqFSACIiGioFICKioVIAIiIaKgUgIqKhUgAiIhoqBSAioqFSACIiGioFICKi\noVIAIiIaKgUgIqKhOhkR7EJJWyTd3hL7qKRNktaV17EtbWdI6pN0l6SjW+KLS6xP0umT/1EiImIs\nOjkCuAhYPET8HNsLy2sVgKT9gBOB/cs650maJWkW8GngGGA/4KSybERE1KSTEcG+LWnvDre3BLjU\n9hPAPZL6gINLW5/tuwEkXVqWvWPMGUdExKSYyDWA0yTdWk4RzSmxucD9LctsLLHh4hERUZPxFoDz\ngZcBC4HNwF9PVkKSlklaK2ltf3//ZG02IiLajKsA2H7Q9lO2nwY+y+Bpnk3A/JZF55XYcPGhtr3C\ndq/t3p6envGkFxERHRhXAZC0V8vs64GBHkIrgRMl7SxpH2ABcCOwBlggaR9JO1FdKF45/rQjImKi\nRr0ILOlLwBHACyVtBD4CHCFpIWDgXuCdALbXS7qM6uLuk8Cptp8q2zkNuAqYBVxoe/2kf5qIiOhY\nJ72AThoifMEIyy8Hlg8RXwWsGlN2ERGx3eRO4IiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIZKAYiI\naKgUgIiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIZKAYiIaKgUgIiIhkoBiIhoqBSAiIiGSgGIiGio\nUQuApAslbZF0e0tsD0mrJW0oP+eUuCSdK6lP0q2SDmpZZ2lZfoOkpdvn40RERKc6OQK4CFjcFjsd\nuNr2AuDqMg9wDNU4wAuAZcD5UBUMqqEkD6EaQP4jA0UjIiLqMWoBsP1tYGtbeAlwcZm+GDiuJX6J\nK9cDs8sA8kcDq21vtf0wsJpti0pERHTReK8B7Gl7c5l+ANizTM8F7m9ZbmOJDRffhqRlktZKWtvf\n3z/O9CIiYjQTvghs24AnIZeB7a2w3Wu7t6enZ7I2GxERbcZbAB4sp3YoP7eU+CZgfsty80psuHhE\nRNRkvAVgJTDQk2cpcEVL/K2lN9ChwCPlVNFVwCJJc8rF30UlFhERNdlhtAUkfQk4AnihpI1UvXnO\nBi6TdApwH/Cmsvgq4FigD3gceBuA7a2SzgLWlOXOtN1+YTkiIrpo1AJg+6Rhmo4aYlkDpw6znQuB\nC8eUXUREbDe5EzgioqFSACIiGioFICKioUa9BhAxE+19+r/UnQIA9579mrpTiAbLEUBEREOlAERE\nNFQKQEREQ6UAREQ0VApARERDpQBERDRUCkBEREOlAERENFQKQEREQ6UAREQ0VApARERDTagASLpX\n0m2S1klaW2J7SFotaUP5OafEJelcSX2SbpV00GR8gIiIGJ/JOAL4bdsLbfeW+dOBq20vAK4u8wDH\nAAvKaxlw/iS8d0REjNP2OAW0BLi4TF8MHNcSv8SV64HZAwPLR0RE9020ABj4pqSbJC0rsT3LQPAA\nDwB7lum5wP0t624ssWeRtEzSWklr+/v7J5heREQMZ6LjARxue5OkFwGrJf2gtdG2JXksG7S9AlgB\n0NvbO6Z1IyKicxM6ArC9qfzcAnwVOBh4cODUTvm5pSy+CZjfsvq8EouIiBqMuwBIep6k3QamgUXA\n7cBKYGlZbClwRZleCby19AY6FHik5VRRRER02UROAe0JfFXSwHa+aPtfJa0BLpN0CnAf8Kay/Crg\nWKAPeBx42wTeOyIiJmjcBcD23cABQ8R/BBw1RNzAqeN9v4iI7a1pY0XnTuCIiIZKAYiIaKgUgIiI\nhkoBiIhoqBSAiIiGSgGIiGioFICIiIZKAYiIaKgUgIiIhkoBiIhoqBSAiIiGSgGIiGioFICIiIZK\nAYiIaKgUgIiIhup6AZC0WNJdkvoknd7t94+IiEpXC4CkWcCngWOA/YCTJO3XzRwiIqIykSEhx+Ng\noK+MJoakS4ElwB3b6w2bNsJPRESnVI3U2KU3k44HFtv+wzL/FuAQ26e1LLMMWFZmfxm4q2sJDu+F\nwEN1JzFFZF8Myr4YlH0xaCrsi5fY7hltoW4fAYzK9gpgRd15tJK01nZv3XlMBdkXg7IvBmVfDJpO\n+6LbF4E3AfNb5ueVWEREdFm3C8AaYIGkfSTtBJwIrOxyDhERQZdPAdl+UtJpwFXALOBC2+u7mcM4\nTalTUjXLvhiUfTEo+2LQtNkXXb0IHBERU0fuBI6IaKgUgIiIhkoBiIhoqBSAGJakQ+vOIWI6kjTl\n7rEaSgpAjOQ8SZ+RNLvuROomad+W6Z3b2hpTKLMfBkn6Tsv059uab+xyOuOSAtBG0rWSrhnmdXXd\n+XVZL3AncGN5bEeTfbFl+nttbed1M5GaZT8Mel7L9P5tbepmIuM1LQ5TuuzPhogdCrwf2NLlXGpl\n+2ngU5K+CXxP0nmAqf5z2/butSbYXRpmeqj5mSz7YdBIfeinRf/6FIA2tm8amJb0W8CfA7sA77L9\njdoSq4mkU4DTgf8HfNrNvXHEw0wPNT+TZT8Mmi3p9VRnUmZLekOJC3h+fWl1LgVgCJKOBj4EPAEs\nt31tzSnVQtJ1wL3A/7L9QM3p1G2epHOpfrkHpinzc+tLq+uyHwb9O/C6lunfbWn7dvfTGbvcCdxG\n0hqgB/gk257jxPbNXU+qJpL+xPa5oy8580laOlK77Yu7lUudsh86I+mNti+vO4/RpAC0kfQtBg9l\nB853D7DtI7ueVE0k3Wz7oLrzmAok7QLsZru/Ld4DPGr7Z/Vk1l3ZD52R9F+2X1x3HqPJKaA2to+o\nO4eYks4F/hX4Slv8cGAR8Eddz6ge2Q+dmRYXxHME0EbS7sCetjeU+ROA55bmq2w/WFtyXSbpSeDx\noZpoWC8gSTfZ/vVh2tbbbu8GOCNlP3QmRwDT118B1wEbyvxfAt+gKgKvBt5VU151uM32gXUnMUX8\nwghtTbqfJvuhkHQbQ/d8ErBnl9MZlxSAbb0KeGfL/KO2/xiefedfNM4WSQfbftYdnpJeBfQPs85M\nlP0w6LV1JzBRKQDb2qGtr3vrHbBNeyTCP9edwBTyPuAySRcBA/eK9AJvpRrZrimyHwZ91vaiupOY\niBSAbT0t6RcH+r3bvh1A0lzg6Voz6769Wvp5b8P2n3QzmTrZvlHSIcD/Af6ghNcDh9huzB3i2Q/P\n0lN3AhOVi8BtJJ0MvBt4L3BLCR9EdW3gXNvtD32asUbp823bl3QtmYgpRtLdDP3oGABst/eUmnJy\nBNDG9j9Kegj4OIMPeLod+HDTHgUx0k09kv6qm7nUTdK1DP+oA9s+qpv51CX74VmeT3UdYKgun2bb\nrrJTTo4AYlymSze3ySJpqK6Pzzwk0ParupxSLbIfBs2EGyVzBNBG0t8xwkOtmnTeexTT4kaXyZKH\nBFayH55l2v8OpABsa23dCUwVkvYYrokZ8J9/rPKQwEr2wzNOrjuBicopoDGQtIPtJ+vOo1sk3cO2\nz0N6hu19uptRffKQwEr2wyBJPwWeGqqJaXKnfApAG0nfsX14mf687be0tE37c34xPm0PCWzXmIcE\nZj8MknTLdL9TPqeAtjXth3nbniS9DPh94MQmPfclDwmsZD88y7T/67lRz+7o0LQf5m2ySfolSX9a\nDv/XU/2/adRdn5Le3zJ9QlvbX3Q/o3pkPzzLiyT93+FedSfXiRSAbc2W9HpJbyzTbyivNzJNhnmb\nLJKWlX7f3wJeAJwCbLb9Mdu31Zpc97UWvDPa2hZ3M5GaZT8MmgXsCuw2zGvKyymgbU37Yd4m0d9T\nXej7fdtrASQ18iiIDIY+IPth0GbbZ9adxESkALSx/ba6c5hC9gJOAP5a0i8ClwE71ptSbTIYeiX7\nYdC0L3jpBTQESbOAObYfKvM7UT346k9tv7LO3OoiaR7we8BJVBfKv2r7g/Vm1T2SngJ+SvVL/1wG\nB8oRsIvtRhTG7IdBkvawvbXuPCYiBaCNpBOBz1D9J98ALAcuBNYAZzWpn/NwJL2CqhfQtD78jbGT\ntKPtn9edR0yOFIA2km4HjrPdJ+kgqnPgx9v+es2pdZ2kN7SFDDwErLP9aA0pTTmSZgOn2l5edy7d\nkHthZpZcA9jW/9jug+quRkkbmvjlX/zuELE9gF+TdIrta7qdUF0kzad67s0vAV8DvgScSTUQyhdr\nTK3bpv157xiUArCtF7X14Z3dOm/7b2rIqRbDXRCX9BKqC8KHdDejWl1C1SvscqrujmuBdcCvDgwe\n1BA9I/Vxb9Lvx0yQArCtz/LsPrzt841n+z5JjbnYV+xh+6Nl+qpyE9SbbTdtlLiBvu85EpgBUgDa\n2P5Y3TlMdZJ+mepJkI0iaQ6DX3w/Ap4vSQDTvTfIGEz7vu8xKAVgCJKOobrLcb8SWg98wvaq+rLq\nPklfZ9u+3XtQ3R8w7R+FO0bPpxoEvfUv34EeYQZe2vWM6pG//GeQ9AJqI+kdwDupRjgaGBugFzgb\n+AfbK+rKrdvKgB+tTPWX7wbb/1NDSlEzSXNsP1x3HjE5UgDaSLoDOLz9kF7SC4DvNPFGMEkLgIHP\nfbPtjXXmUwdJJ9v+xzJ9mO3vtrSdZvvv68uueyQ9yuBR4cDRgKnOJuxkO2cVppE8DG5bGup8ru0f\n1ZFMnSTNlvQ14CqqO6H/APh3SZ9RpUkP/2rt+fJ3bW1v72YidbK9m+3dy2s3qtOBy4EHgL+tN7sY\nq1Trbf1E0gG2v98alHQA0LSbn/6OqqvjGwZ6u5SLnh8Cvg68oryaIA9Ba1FugHsPg/dBvKqJfyRN\ndykA23ovsFLS56gu+kF1DWApzbvweWjriGhQDfkEnCVpC3BYPWnVIg9BAyS9kOp35PeoHpFyoO1H\n6s0qxivXAIYgaU/gVAZHBLsD+HTDbvih3AW9YJi2Ptsv73ZOdZH0ONBH9df+y8o0Zf6ltp833Loz\nSRkHtx/4HEMcEedGsOklRwBtJL3Y9n8BH647lyngOkkfpnoI3jN/KUj6EHBdfWnVonEX/4fxSQaP\neHKD5DSXI4A2rQ+7knS57TfWnVNdJO0OXAAcRHUtAGAhcAvwdts/qSu3bpN0qO3r684jYjKlF9C2\nWi/oNeXmniHZ/ontE4BFwEXltcj28a1f/pKaMDj8eQMTkr5XZyJ1knRZy/Qn2tq+2f2MYiJSALY1\n0sW+RrL9Q9tfL68fDrHI57ueVPe1/mGwS21Z1K/1mtDvtLX1dDORmLhcA9jWAZJ+QhnxqExT5m17\n9/pSm7Ka0A3yOeVZQM9pmX7mczfoWUAj/VGUP5immRSANrZn1Z3DNNSEX/z2ZwG1jgzXpGcB/YKk\nA6kK4XPLoEkwOERkTCO5CBwTllGimkPStVQFr/UxEM802/7t7mcV45VrADEZZvyD4SSd3DJ9WFvb\nad3PqDYfoBoH4bfLl/3FwGPA7UBje8xNVzkCiGFJugL4bnmtafITQNu6Bz/riKdJR0CSbgb+t+2t\nkn4TuBT4Y6ruwa+0fXytCcaY5BpAjOSzwKupHvZ1gKQ7qW4A+y5wne0H60yuy/IsoMqslgvevwes\nsH05cLmkdSOsF1NQCkAMy/aVwJUAkmYBBwJHUN0Nug/V8IBNkWcBVWZJ2sH2k8BRwLKWtnyfTDP5\nB4sRlYd/vbq8DqXqA/9vQNNuhtpX0q2UZwGVacp8U3oAAXyJ6pHgDwH/DfwHgKSXA3ko3DSTawAx\nLEkbqH6pLweup7oO8Fi9WdVD0ktGard9X7dyqZukQ6nGAfim7Z+W2CuAXW3fPOLKMaWkAMSwJJ1B\n9Vf/XOA/qf7q/x5wi+2n6sxtqpD0HOAk21+oO5eIsUoBiI6Uv/BeDfwGcDjwkO32MYNnrPJgvFOp\niuFKYDVwGtWz8b9ve0mN6UWMS64BxKgkvRQ4GDiE6ojgRcA9tSbVfZ8HHqY6AvpD4INU5/+Ps53e\nLzEt5QgghiXpq1Rf+o9Sun4C37V9Z62J1UDSbbZ/tUzPAjYDL7b9s3ozixi/HAHESD4HvMP2Q3Un\nMgX8fGDC9lOSNubLP6a7HAHEiCS9iGcPj7keOK9hN4Eh6SngpwOzVA8+e5w8JTamsTwLKIZVnnmz\npsxeUl4AN7Q/D2emsz3L9u7ltZvtHVqm8+Uf01KOAGJYkq4H/sj2LW3xhcBnbB9ST2YRMRlyBBAj\n2b39yx+g9HrJgOAR01wKQIxEZeSr9uAe5P9OxLSXX+IYyTnANyX9lqTdyusI4BvAp+pNLSImKtcA\nYkSSXgu8n6oXkIE7gE/a/nqtiUXEhKUAxLhIeo/tHAVETGMpADEukv7L9ovrziMixi/XAGK8mjQK\nVsSMlAIQ45VDx4hpLs8CimFJepShv+gHHoUQEdNYrgFERDRUTgFFRDRUCkBEREOlAMSMIemxlulj\nJf2npJdI+qikP5vE97lusrYVUacUgJhxJB0FnAscY/u+yd6+7VePIReVgeMjppz8x4wZRdJvAp8F\nXmv7h0O0v0PSGknfl3S5pF8o8RMk3V7i3y6x/SXdKGmdpFslLSjx1iON95Xt3SrpYyW2t6S7JF0C\n3A7Mb1vneEkXlemLJJ0r6TpJd0s6vsSfI+k8ST+QtFrSqoG2iMmSAhAzyc7A16gGav/BMMt8xfar\nbB8A3AmcUuIfBo4u8deV2LuAv7W9EOgFNrZuSNIiYAFwMLAQ+PVSgCjx82zv38FRyF7A4cBrgbNL\n7A3A3sB+wFuA3xhlGxFjlgIQM8nPqQauP2WEZX5F0n9Iug14M4NDXX4XuEjSO4BZJfY94IOSPgC8\nxPZ/t21rUXndAtwM7Ev1xQ9wn+3rO8z7a7aftn0HsGeJHQ78c4k/AFzb4bYiOpYCEDPJ08CbgIMl\nfXCYZS4CTrP9q8DHgF0AbL9NCQt4AAABPklEQVQL+BAwH7hJ0gtsf5HqaOC/gVWSjmzbloC/tL2w\nvF5u+4LS9tO2ZVtvuNmlre2Jtm1GdEUKQMwoth8HXgO8WdJQRwK7AZsl7Uh1BACApJfZvsH2h4F+\nqvP2LwXutn0ucAXwa23bugp4u6RdyzbmSnrRMKk9KOmV5YLw6zv4KN8F3liuBewJHNHBOhFjkkdB\nxIxje6ukxcC3JfW3Nf85cAPVl/wNDA5t+clykVfA1cD3gQ8Ab5H0c+AB4C/a3uebkl4JfE8SwGPA\nycBTQ6R1OnBled+1wK6jfIzLgaOoxl+4n+oU0yOjrBMxJnkURMQUJWlX249JegFwI3BYuR4QMSly\nBBAxdV0paTawE3BWvvxjsuUIICKioXIROCKioVIAIiIaKgUgIqKhUgAiIhoqBSAioqFSACIiGur/\nAx+tswcZciSzAAAAAElFTkSuQmCC\n", + "text/html": [ + "
\n", + " | Satz | \n", + "Tokens | \n", + "Klassierung | \n", + "Bemerkungen | \n", + "label | \n", + "input | \n", + "
---|---|---|---|---|---|---|
0 | \n", + "Synonyme sind: Aspartyl-Glukosaminidase-Mang... | \n", + "['synonyme', 'sind', 'aspartyl-glukosaminidase... | \n", + "LOW_QUALITY | \n", + "NaN | \n", + "LOW_QUALITY | \n", + "[[0.0, 0.04761905, 0.0, 0.0952381, 0.0, 0.0, 0... | \n", + "
1 | \n", + "Es wird also die bisherige Vermeidung aufgegeb... | \n", + "['es', 'wird', 'also', 'die', 'bisherige', 've... | \n", + "SENTENCE | \n", + "NaN | \n", + "SENTENCE | \n", + "[[0.04, 0.04, 0.04, 0.08, 0.04, 0.04, 0.04, 0.... | \n", + "
2 | \n", + "übersicht Wien Klin Wochenschr (2009) 121: 54... | \n", + "['übersicht', 'wien', 'klin', 'wochenschr', '2... | \n", + "REFERENCE | \n", + "NaN | \n", + "REFERENCE | \n", + "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0153846... | \n", + "
3 | \n", + "Aschoff4 · L. Kinzl1 1 Zentrum für Chirurgie, ... | \n", + "['aschoff4', '', 'l', 'kinzl1', '1', 'zentrum'... | \n", + "LOW_QUALITY | \n", + "NaN | \n", + "LOW_QUALITY | \n", + "[[0.0, 0.0, 0.0, 0.014285714, 0.0, 0.0, 0.0, 0... | \n", + "
4 | \n", + "Morgenland – Abendland. | \n", + "['morgenland', 'abendland'] | \n", + "LOW_QUALITY | \n", + "NaN | \n", + "LOW_QUALITY | \n", + "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... | \n", + "
0 | \n", - "Durch die Ausnutzung der Schwerkraft kann di... | \n", - "['durch', 'die', 'ausnutzung', 'der', 'schwerk... | \n", - "SENTENCE | \n", + "Synonyme sind: Aspartyl-Glukosaminidase-Mang... | \n", + "['synonyme', 'sind', 'aspartyl-glukosaminidase... | \n", + "LOW_QUALITY | \n", "NaN | \n", - "SENTENCE | \n", - "[[0.0384615, 0.0769231, 0.0384615, 0.0384615, ... | \n", + "LOW_QUALITY | \n", + "[[0.0, 0.04761905, 0.0, 0.0952381, 0.0, 0.0, 0... | \n", "
---|---|---|---|---|---|---|---|---|---|---|---|
1 | \n", - "Er ist vor allem mit seinen Arbeiten zum Thema... | \n", - "['er', 'ist', 'vor', 'allem', 'mit', 'seinen',... | \n", + "Es wird also die bisherige Vermeidung aufgegeb... | \n", + "['es', 'wird', 'also', 'die', 'bisherige', 've... | \n", "SENTENCE | \n", "NaN | \n", "SENTENCE | \n", - "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0588235... | \n", + "[[0.04, 0.04, 0.04, 0.08, 0.04, 0.04, 0.04, 0.... | \n", "||
2 | \n", - "An anderen [[Monoamin]]transportern wirken sie... | \n", - "['an', 'anderen', 'monoamin', 'transportern', ... | \n", - "SENTENCE | \n", + "übersicht Wien Klin Wochenschr (2009) 121: 54... | \n", + "['übersicht', 'wien', 'klin', 'wochenschr', '2... | \n", + "REFERENCE | \n", "NaN | \n", - "SENTENCE | \n", - "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... | \n", + "REFERENCE | \n", + "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0153846... | \n", "
3 | \n", - "Etwa die Hälfte der an einem Schlaganfall-Erkr... | \n", - "['etwa', 'die', 'hälfte', 'der', 'an', 'einem'... | \n", + "Aschoff4 · L. Kinzl1 1 Zentrum für Chirurgie, ... | \n", + "['aschoff4', '', 'l', 'kinzl1', '1', 'zentrum'... | \n", "LOW_QUALITY | \n", "NaN | \n", "LOW_QUALITY | \n", - "[[0.0, 0.0294118, 0.0, 0.0294118, 0.0, 0.0, 0.... | \n", + "[[0.0, 0.0, 0.0, 0.014285714, 0.0, 0.0, 0.0, 0... | \n", "||
4 | \n", - "Schwerpunktthema: Was ist gesichert in der The... | \n", - "['schwerpunktthema', 'was', 'ist', 'gesichert'... | \n", - "TITLE | \n", + "Morgenland – Abendland. | \n", + "['morgenland', 'abendland'] | \n", + "LOW_QUALITY | \n", "NaN | \n", - "TITLE | \n", - "[[0.0, 0.0, 0.0, 0.142857, 0.0, 0.0, 0.0, 0.0,... | \n", + "LOW_QUALITY | \n", + "[[0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0, 0.0,... | \n", "